diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6cc1ff3fcb994bd2d321a58af11862aaae88e1db --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +FROM python:3.9 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg + +RUN python3 -m pip install --upgrade pip + +# Python installation +WORKDIR /usr/src/app + +# Note: First installing the python requirements permits to save time when re-building after a source change. +COPY requirements.txt /usr/src/app/requirements.txt +RUN cd /usr/src/app/ && pip3 install -r requirements.txt + +# Copy source +COPY setup.py /usr/src/app/setup.py +COPY whisper_timestamped /usr/src/app/whisper_timestamped + +# Install +RUN cd /usr/src/app/ && pip3 install ".[dev]" +RUN cd /usr/src/app/ && pip3 install ".[vad_silero]" +RUN cd /usr/src/app/ && pip3 install ".[vad_auditok]" +RUN cd /usr/src/app/ && pip3 install ".[test]" + +# Cleanup +RUN rm -R /usr/src/app/requirements.txt /usr/src/app/setup.py /usr/src/app/whisper_timestamped + +# Copy tests +COPY tests /usr/src/app/tests + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/Dockerfile.cpu b/Dockerfile.cpu new file mode 100644 index 0000000000000000000000000000000000000000..dceffbfc595003c75680694d24e4de72536559c7 --- /dev/null +++ b/Dockerfile.cpu @@ -0,0 +1,35 @@ +FROM python:3.9 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg + +RUN python3 -m pip install --upgrade pip + +# Python installation +WORKDIR /usr/src/app + +# Force CPU versions of torch +RUN pip3 install \ + torch==1.13.1+cpu \ + torchaudio==0.13.1+cpu \ + -f https://download.pytorch.org/whl/torch_stable.html + +# Note: First installing the python requirements permits to save time when re-building after a source change. +COPY requirements.txt /usr/src/app/requirements.txt +RUN cd /usr/src/app/ && pip3 install -r requirements.txt + +# Copy source +COPY setup.py /usr/src/app/setup.py +COPY whisper_timestamped /usr/src/app/whisper_timestamped + +# Install +RUN cd /usr/src/app/ && pip3 install ".[dev]" + +# Cleanup +RUN rm -R /usr/src/app/requirements.txt /usr/src/app/setup.py /usr/src/app/whisper_timestamped + +# Copy tests +COPY tests /usr/src/app/tests + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000000000000000000000000000000000000..0ad25db4bd1d86c452db3f9602ccdbe172438f52 --- /dev/null +++ b/LICENCE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md index b395ff064c4df0831530d702e0288acad5833a67..3dd9f012eba8e700398aa4bd4a12d6e8a01d198e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,416 @@ ---- -license: agpl-3.0 ---- +# whisper-timestamped + +Multilingual Automatic Speech Recognition with word-level timestamps and confidence. + +* [Description](#description) + * [Notes on other approaches](#notes-on-other-approaches) +* [Installation](#installation) + * [First installation](#first-installation) + * [Additional packages that might be needed](#additional-packages-that-might-be-needed) + * [Docker](#docker) + * [Light installation for CPU](#light-installation-for-cpu) + * [Upgrade to the latest version](#upgrade-to-the-latest-version) +* [Usage](#usage) + * [Python](#python) + * [Command line](#command-line) + * [Plot of word alignment](#plot-of-word-alignment) + * [Example output](#example-output) + * [Options that may improve results](#options-that-may-improve-results) + * [Accurate Whisper transcription](#accurate-whisper-transcription) + * [Running Voice Activity Detection (VAD) before sending to Whisper](#running-voice-activity-detection-vad-before-sending-to-whisper) + * [Detecting disfluencies](#detecting-disfluencies) +* [Acknowlegment](#acknowlegment) +* [Citations](#citations) + +## Description + +[Whisper](https://openai.com/blog/whisper/) is a set of multi-lingual, robust speech recognition models trained by OpenAI that achieve state-of-the-art results in many languages. Whisper models were trained to predict approximate timestamps on speech segments (most of the time with 1-second accuracy), but they cannot originally predict word timestamps. This repository proposes an implementation to **predict word timestamps and provide a more accurate estimation of speech segments when transcribing with Whisper models**. +Besides, a confidence score is assigned to each word and each segment. + +The approach is based on Dynamic Time Warping (DTW) applied to cross-attention weights, as demonstrated by [this notebook by Jong Wook Kim](https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/notebooks/Multilingual_ASR.ipynb). There are some additions to this notebook: +* The start/end estimation is more accurate. +* Confidence scores are assigned to each word. +* **If possible (without beam search...)**, no additional inference steps are required to predict word timestamps (word alignment is done on the fly after each speech segment is decoded). +* Special care has been taken regarding memory usage: `whisper-timestamped` is able to process long files with little additional memory compared to the regular use of the Whisper model. + +`whisper-timestamped` is an extension of the [`openai-whisper`](https://pypi.org/project/whisper-openai/) Python package and is meant to be compatible with any version of `openai-whisper`. +It provides more efficient/accurate word timestamps, along with those additional features: +* Voice Activity Detection (VAD) can be run before applying Whisper model, + to avoid hallucinations due to errors in the training data (for instance, predicting "Thanks you for watching!" on pure silence). + Several VAD methods are available: silero (default), auditok, auditok:v3.1 +* When the language is not specified, the language probabilities are provided among the outputs. + +### Notes on other approaches + +An alternative relevant approach to recovering word-level timestamps involves using wav2vec models that predict characters, as successfully implemented in [whisperX](https://github.com/m-bain/whisperX). However, these approaches have several drawbacks that are not present in approaches based on cross-attention weights such as `whisper_timestamped`. These drawbacks include: +* The need to find one wav2vec model per language to support, which does not scale well with the multi-lingual capabilities of Whisper. +* The need to handle (at least) one additional neural network (wav2vec model), which consumes memory. +* The need to normalize characters in Whisper transcription to match the character set of the wav2vec model. This involves awkward language-dependent conversions, such as converting numbers to words ("2" -> "two"), symbols to words ("%" -> "percent", "€" -> "euro(s)")... +* The lack of robustness around speech disfluencies (fillers, hesitations, repeated words...) that are usually removed by Whisper. + +An alternative approach that does not require an additional model is to look at the probabilities of timestamp tokens estimated by the Whisper model after each (sub)word token is predicted. This was implemented, for instance, in whisper.cpp and stable-ts. However, this approach lacks robustness because Whisper models have not been trained to output meaningful timestamps after each word. Whisper models tend to predict timestamps only after a certain number of words have been predicted (typically at the end of a sentence), and the probability distribution of timestamps outside this condition may be inaccurate. In practice, these methods can produce results that are totally out-of-sync on some periods of time (we observed this especially when there is jingle music). Also, the timestamp precision of Whisper models tends to be rounded to 1 second (as in many video subtitles), which is too inaccurate for words, and reaching better accuracy is tricky. + +## Installation + +### First installation + +Requirements: +* `python3` (version higher or equal to 3.7, at least 3.9 is recommended) +* `ffmpeg` (see instructions for installation on the [whisper repository](https://github.com/openai/whisper)) + +You can install `whisper-timestamped` either by using pip: +```bash +pip3 install whisper-timestamped +``` + +or by cloning this repository and running installation: +```bash +git clone https://github.com/linto-ai/whisper-timestamped +cd whisper-timestamped/ +python3 setup.py install +``` + +#### Additional packages that might be needed + +If you want to plot alignment between audio timestamps and words (as in [this section](#plot-of-word-alignment)), you also need matplotlib: +```bash +pip3 install matplotlib +``` + +If you want to use VAD option (Voice Activity Detection before running Whisper model), you also need torchaudio and onnxruntime: +```bash +pip3 install onnxruntime torchaudio +``` + +If you want to use finetuned Whisper models from the Hugging Face Hub, you also need transformers: +```bash +pip3 install transformers +``` + +#### Docker + +A docker image of about 9GB can be built using: +```bash +git clone https://github.com/linto-ai/whisper-timestamped +cd whisper-timestamped/ +docker build -t whisper_timestamped:latest . +``` + +### Light installation for CPU + +If you don't have a GPU (or don't want to use it), then you don't need to install the CUDA dependencies. You should then just install a light version of torch **before** installing whisper-timestamped, for instance as follows: +```bash +pip3 install \ + torch==1.13.1+cpu \ + torchaudio==0.13.1+cpu \ + -f https://download.pytorch.org/whl/torch_stable.html +``` + +A specific docker image of about 3.5GB can also be built using: +```bash +git clone https://github.com/linto-ai/whisper-timestamped +cd whisper-timestamped/ +docker build -t whisper_timestamped_cpu:latest -f Dockerfile.cpu . +``` + +### Upgrade to the latest version + +When using pip, the library can be updated to the latest version using: +``` +pip3 install --upgrade --no-deps --force-reinstall git+https://github.com/linto-ai/whisper-timestamped +``` + +A specific version of `openai-whisper` can be used by running, for example: +```bash +pip3 install openai-whisper==20230124 +``` + +## Usage + +### Python + +In Python, you can use the function `whisper_timestamped.transcribe()`, which is similar to the function `whisper.transcribe()`: +```python +import whisper_timestamped +help(whisper_timestamped.transcribe) +``` +The main difference with `whisper.transcribe()` is that the output will include a key `"words"` for all segments, with the word start and end position. Note that the word will include punctuation. See the example [below](#example-output). + +Besides, the default decoding options are different to favour efficient decoding (greedy decoding instead of beam search, and no temperature sampling fallback). To have same default as in `whisper`, use ```beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0)```. + +There are also additional options related to word alignement. + +In general, if you import `whisper_timestamped` instead of `whisper` in your Python script and use `transcribe(model, ...)` instead of `model.transcribe(...)`, it should do the job: +``` +import whisper_timestamped as whisper + +audio = whisper.load_audio("AUDIO.wav") + +model = whisper.load_model("tiny", device="cpu") + +result = whisper.transcribe(model, audio, language="fr") + +import json +print(json.dumps(result, indent = 2, ensure_ascii = False)) +``` + +Note that you can use a finetuned Whisper model from HuggingFace or a local folder by using the `load_model` method of `whisper_timestamped`. For instance, if you want to use [whisper-large-v2-nob](https://huggingface.co/NbAiLab/whisper-large-v2-nob), you can simply do the following: +``` +import whisper_timestamped as whisper + +model = whisper.load_model("NbAiLab/whisper-large-v2-nob", device="cpu") + +# ... +``` + +### Command line + +You can also use `whisper_timestamped` on the command line, similarly to `whisper`. See help with: +```bash +whisper_timestamped --help +``` + +The main differences with `whisper` CLI are: +* Output files: + * The output JSON contains word timestamps and confidence scores. See example [below](#example-output). + * There is an additional CSV output format. + * For SRT, VTT, TSV formats, there will be additional files saved with word timestamps. +* Some default options are different: + * By default, no output folder is set: Use `--output_dir .` for Whisper default. + * By default, there is no verbose: Use `--verbose True` for Whisper default. + * By default, beam search decoding and temperature sampling fallback are disabled, to favour an efficient decoding. + To set the same as Whisper default, you can use `--accurate` (which is an alias for ```--beam_size 5 --temperature_increment_on_fallback 0.2 --best_of 5```). +* There are some additional specific options: + + * `--compute_confidence` to enable/disable the computation of confidence scores for each word. + * `--punctuations_with_words` to decide whether punctuation marks should be included or not with preceding words. + +An example command to process several files using the `tiny` model and output the results in the current folder, as would be done by default with whisper, is as follows: +``` +whisper_timestamped audio1.flac audio2.mp3 audio3.wav --model tiny --output_dir . +``` + +Note that you can use a fine-tuned Whisper model from HuggingFace or a local folder. For instance, if you want to use the [whisper-large-v2-nob](https://huggingface.co/NbAiLab/whisper-large-v2-nob) model, you can simply do the following: +``` +whisper_timestamped --model NbAiLab/whisper-large-v2-nob <...> +``` + +### Plot of word alignment + +Note that you can use the `plot_word_alignment` option of the `whisper_timestamped.transcribe()` Python function or the `--plot` option of the `whisper_timestamped` CLI to see the word alignment for each segment. + +![Example alignement](figs/example_alignement_plot.png) + +* The upper plot represents the transformation of cross-attention weights used for alignment with Dynamic Time Warping. The abscissa represents time, and the ordinate represents the predicted tokens, with special timestamp tokens at the beginning and end, and (sub)words and punctuation in the middle. +* The lower plot is an MFCC representation of the input signal (features used by Whisper, based on Mel-frequency cepstrum). +* The vertical dotted red lines show where the word boundaries are found (with punctuation marks "glued" to the previous word). + +### Example output + +The output of `whisper_timestamped.transcribe()` function is a python dictionary, +which can be viewed in JSON format using the CLI. + +The JSON schema can be seen in [tests/json_schema.json](tests/json_schema.json). + +Here is an example output: +```bash +whisper_timestamped AUDIO_FILE.wav --model tiny --language fr +``` +```json +{ + "text": " Bonjour! Est-ce que vous allez bien?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.5, + "end": 1.2, + "text": " Bonjour!", + "tokens": [ 25431, 2298 ], + "temperature": 0.0, + "avg_logprob": -0.6674491882324218, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.10241222381591797, + "confidence": 0.51, + "words": [ + { + "text": "Bonjour!", + "start": 0.5, + "end": 1.2, + "confidence": 0.51 + } + ] + }, + { + "id": 1, + "seek": 200, + "start": 2.02, + "end": 4.48, + "text": " Est-ce que vous allez bien?", + "tokens": [ 50364, 4410, 12, 384, 631, 2630, 18146, 3610, 2506, 50464 ], + "temperature": 0.0, + "avg_logprob": -0.43492694334550336, + "compression_ratio": 0.7714285714285715, + "no_speech_prob": 0.06502953916788101, + "confidence": 0.595, + "words": [ + { + "text": "Est-ce", + "start": 2.02, + "end": 3.78, + "confidence": 0.441 + }, + { + "text": "que", + "start": 3.78, + "end": 3.84, + "confidence": 0.948 + }, + { + "text": "vous", + "start": 3.84, + "end": 4.0, + "confidence": 0.935 + }, + { + "text": "allez", + "start": 4.0, + "end": 4.14, + "confidence": 0.347 + }, + { + "text": "bien?", + "start": 4.14, + "end": 4.48, + "confidence": 0.998 + } + ] + } + ], + "language": "fr" +} +``` +If the language is not specified (e.g. without option `--language fr` in the CLI) you will find an additional key with the language probabilities: +```json +{ + ... + "language": "fr", + "language_probs": { + "en": 0.027954353019595146, + "zh": 0.02743500843644142, + ... + "fr": 0.9196318984031677, + ... + "su": 3.0119704064190955e-08, + "yue": 2.2565967810805887e-05 + } +} +``` + +### Options that may improve results + +Here are some options that are not enabled by default but might improve results. + +#### Accurate Whisper transcription + +As mentioned earlier, some decoding options are disabled by default to offer better efficiency. However, this can impact the quality of the transcription. To run with the options that have the best chance of providing a good transcription, use the following options. +* In Python: +```python +results = whisper_timestamped.transcribe(model, audio, beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), ...) +``` +* On the command line: +```bash +whisper_timestamped --accurate ... +``` + +#### Running Voice Activity Detection (VAD) before sending to Whisper + +Whisper models can "hallucinate" text when given a segment without speech. This can be avoided by running VAD and gluing speech segments together before transcribing with the Whisper model. This is possible with `whisper-timestamped`. +* In Python: +```python +results = whisper_timestamped.transcribe(model, audio, vad=True, ...) +``` +* On the command line: +```bash +whisper_timestamped --vad True ... +``` + +By default, the VAD method used is [silero](https://github.com/snakers4/silero-vad). +But other methods are available, such as earlier versions of silero, or [auditok](https://github.com/amsehili/auditok). +Those methods were introduced because latest versions of silero VAD can have a lot of false alarms on some audios (speech detected on silence). +* In Python: +```python +results = whisper_timestamped.transcribe(model, audio, vad="silero:v3.1", ...) +results = whisper_timestamped.transcribe(model, audio, vad="auditok", ...) +``` +* On the command line: +```bash +whisper_timestamped --vad silero:v3.1 ... +whisper_timestamped --vad auditok ... +``` + +In order to watch the VAD results, you can use the `--plot` option of the `whisper_timestamped` CLI, +or the `plot_word_alignment` option of the `whisper_timestamped.transcribe()` Python function. +It will show the VAD results on the input audio signal as following (x-axis is time in seconds): +| **vad="silero:v4.0"** | **vad="silero:v3.1"** | **vad="auditok"** | +| :---: | :---: | :---: | +| ![Example VAD](figs/VAD_silero_v4.0.png) | ![Example VAD](figs/VAD_silero_v3.1.png) | ![Example VAD](figs/VAD_auditok.png) | + +#### Detecting disfluencies + +Whisper models tend to remove speech disfluencies (filler words, hesitations, repetitions, etc.). Without precautions, the disfluencies that are not transcribed will affect the timestamp of the following word: the timestamp of the beginning of the word will actually be the timestamp of the beginning of the disfluencies. `whisper-timestamped` can have some heuristics to avoid this. +* In Python: +```python +results = whisper_timestamped.transcribe(model, audio, detect_disfluencies=True, ...) +``` +* On the command line: +```bash +whisper_timestamped --detect_disfluencies True ... +``` +**Important:** Note that when using these options, possible disfluencies will appear in the transcription as a special "`[*]`" word. + + +## Acknowlegment +* [whisper](https://github.com/openai/whisper): Whisper speech recognition (License MIT). +* [dtw-python](https://pypi.org/project/dtw-python): Dynamic Time Warping (License GPL v3). + +## Citations +If you use this in your research, please cite the repo: + +```bibtex +@misc{lintoai2023whispertimestamped, + title={whisper-timestamped}, + author={Louradour, J{\'e}r{\^o}me}, + journal={GitHub repository}, + year={2023}, + publisher={GitHub}, + howpublished = {\url{https://github.com/linto-ai/whisper-timestamped}} +} +``` + +as well as the OpenAI Whisper paper: + +```bibtex +@article{radford2022robust, + title={Robust speech recognition via large-scale weak supervision}, + author={Radford, Alec and Kim, Jong Wook and Xu, Tao and Brockman, Greg and McLeavey, Christine and Sutskever, Ilya}, + journal={arXiv preprint arXiv:2212.04356}, + year={2022} +} +``` + +and this paper for Dynamic-Time-Warping: + +```bibtex +@article{JSSv031i07, + title={Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package}, + author={Giorgino, Toni}, + journal={Journal of Statistical Software}, + year={2009}, + volume={31}, + number={7}, + doi={10.18637/jss.v031.i07} +} +``` diff --git a/figs/VAD_auditok.png b/figs/VAD_auditok.png new file mode 100644 index 0000000000000000000000000000000000000000..9dbaa4adc16a3082df4a6b17ddb3e4dbc0d4c227 Binary files /dev/null and b/figs/VAD_auditok.png differ diff --git a/figs/VAD_silero_v3.1.png b/figs/VAD_silero_v3.1.png new file mode 100644 index 0000000000000000000000000000000000000000..5216ef11328b5437fe715c63f66c0b095e0442d8 Binary files /dev/null and b/figs/VAD_silero_v3.1.png differ diff --git a/figs/VAD_silero_v4.0.png b/figs/VAD_silero_v4.0.png new file mode 100644 index 0000000000000000000000000000000000000000..3b516f9a5ac8eebf05688f7689ae49ae0dc4f24b Binary files /dev/null and b/figs/VAD_silero_v4.0.png differ diff --git a/figs/example_alignement_plot.png b/figs/example_alignement_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..504528ece2e36b2cec4990ccdfcdc1d771a14beb Binary files /dev/null and b/figs/example_alignement_plot.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f5a15670950a2dca208b867d00ff408e2cd577f3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +Cython +dtw-python +openai-whisper \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..d3e55caa5f5f0e202f9ce77afea457b8bbd60a46 --- /dev/null +++ b/setup.py @@ -0,0 +1,58 @@ +import os + +from setuptools import setup, find_packages + +install_requires = [ + "Cython", + "dtw-python", + "openai-whisper", +] + +required_packages_filename = os.path.join(os.path.dirname(__file__), "requirements.txt") +if os.path.exists(required_packages_filename): + install_requires2 = [l.strip() for l in open(required_packages_filename).readlines()] + assert install_requires == install_requires2, f"requirements.txt is not up-to-date: {install_requires} != {install_requires2}" + +version = None +license = None +with open(os.path.join(os.path.dirname(__file__), "whisper_timestamped", "transcribe.py")) as f: + for line in f: + if line.strip().startswith("__version__"): + version = line.split("=")[1].strip().strip("\"'") + if version and license: + break + if line.strip().startswith("__license__"): + license = line.split("=")[1].strip().strip("\"'") + if version and license: + break +assert version and license + +description="Multi-lingual Automatic Speech Recognition (ASR) based on Whisper models, with accurate word timestamps, access to language detection confidence, several options for Voice Activity Detection (VAD), and more." + +setup( + name="whisper-timestamped", + py_modules=["whisper_timestamped"], + version=version, + description=description, + long_description=description+"\nSee https://github.com/linto-ai/whisper-timestamped for more information.", + long_description_content_type='text/markdown', + python_requires=">=3.7", + author="Jeronymous", + url="https://github.com/linto-ai/whisper-timestamped", + license=license, + packages=find_packages(exclude=["tests*"]), + install_requires=install_requires, + entry_points = { + 'console_scripts': [ + 'whisper_timestamped=whisper_timestamped.transcribe:cli', + 'whisper_timestamped_make_subtitles=whisper_timestamped.make_subtitles:cli' + ], + }, + include_package_data=True, + extras_require={ + 'dev': ['matplotlib==3.7.4', 'transformers'], + 'vad_silero': ['onnxruntime', 'torchaudio'], + 'vad_auditok': ['auditok'], + 'test': ['jsonschema'], + }, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/data/apollo11.mp3 b/tests/data/apollo11.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d9246ca9f73701df5270dd342bdca2d316eda5b3 Binary files /dev/null and b/tests/data/apollo11.mp3 differ diff --git a/tests/data/bonjour.wav b/tests/data/bonjour.wav new file mode 100644 index 0000000000000000000000000000000000000000..f03944e35c448f2226923356f7208d0234a6419a Binary files /dev/null and b/tests/data/bonjour.wav differ diff --git a/tests/data/bonjour_vous_allez_bien.mp3 b/tests/data/bonjour_vous_allez_bien.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..53bd078e6610cca0bddf7dbcaff2b2a991028c51 Binary files /dev/null and b/tests/data/bonjour_vous_allez_bien.mp3 differ diff --git a/tests/data/empty.mp3 b/tests/data/empty.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a73b2f2f14d2f33beb872041f64abc0208eee70b Binary files /dev/null and b/tests/data/empty.mp3 differ diff --git a/tests/data/empty.wav b/tests/data/empty.wav new file mode 100644 index 0000000000000000000000000000000000000000..a15eb0580e1a5cd7cd4b4d3e610155a3a967c81b Binary files /dev/null and b/tests/data/empty.wav differ diff --git a/tests/data/gaenswein15.mp3 b/tests/data/gaenswein15.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..48689cfe8af4a3873f7c988793bd48ab12523094 Binary files /dev/null and b/tests/data/gaenswein15.mp3 differ diff --git a/tests/data/gloria.mp3 b/tests/data/gloria.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..8289f3989deb8bccd19b102b9530974600791798 Binary files /dev/null and b/tests/data/gloria.mp3 differ diff --git a/tests/data/japanese.mp3 b/tests/data/japanese.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..10337faea507f73c0add491837ad997e5656741a Binary files /dev/null and b/tests/data/japanese.mp3 differ diff --git a/tests/data/laugh1.mp3 b/tests/data/laugh1.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c564abe8c4f87cbaa1ee1998f17a36a1592f5c61 Binary files /dev/null and b/tests/data/laugh1.mp3 differ diff --git a/tests/data/laugh2.mp3 b/tests/data/laugh2.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..b8fb8840f31d416eab241969a1b5f2da067a6acf Binary files /dev/null and b/tests/data/laugh2.mp3 differ diff --git a/tests/data/no_punctuations.mp3.words.json b/tests/data/no_punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..46d20abcf33869ed2f4eda32988e19619cfaf7b4 --- /dev/null +++ b/tests/data/no_punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.4, + "end": 2.34, + "text": " Dis-moi, est-ce que l'avion vole?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50494 + ], + "temperature": 0.0, + "avg_logprob": -0.3014036907869227, + "compression_ratio": 0.8048780487804879, + "no_speech_prob": 0.05134102329611778, + "confidence": 0.925, + "words": [ + { + "text": "Dis-moi", + "start": 0.4, + "end": 1.36, + "confidence": 0.801 + }, + { + "text": "est-ce", + "start": 1.36, + "end": 1.52, + "confidence": 0.966 + }, + { + "text": "que", + "start": 1.52, + "end": 1.64, + "confidence": 0.977 + }, + { + "text": "l'avion", + "start": 1.64, + "end": 2.02, + "confidence": 0.994 + }, + { + "text": "vole", + "start": 2.02, + "end": 2.34, + "confidence": 0.891 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/data/punctuations.mp3 b/tests/data/punctuations.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..b797032d15a7fc9cd575f79bd6ec43e37e66e555 Binary files /dev/null and b/tests/data/punctuations.mp3 differ diff --git a/tests/data/radio_short.mp3 b/tests/data/radio_short.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..1772d945c050cb88948fb3b4d7f84657e06a90d7 Binary files /dev/null and b/tests/data/radio_short.mp3 differ diff --git a/tests/data/smartphone.mp3 b/tests/data/smartphone.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..9c1c12473bf6c5485426a4f563b1385fd039f243 Binary files /dev/null and b/tests/data/smartphone.mp3 differ diff --git a/tests/data/smartphone.mp3.words.json b/tests/data/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..8cf211f5b9abc1b522e115e190503add6af20a12 --- /dev/null +++ b/tests/data/smartphone.mp3.words.json @@ -0,0 +1,4802 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc, à objet inédit, rapport inédit. Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Et bien, le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 3.62, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.38, + "end": 0.58, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.58, + "end": 0.88, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.88, + "end": 1.02, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.02, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.78, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.78, + "end": 1.9, + "confidence": 0.979 + }, + { + "text": "je", + "start": 1.9, + "end": 2.24, + "confidence": 0.981 + }, + { + "text": "ne", + "start": 2.24, + "end": 2.34, + "confidence": 0.837 + }, + { + "text": "me", + "start": 2.34, + "end": 2.38, + "confidence": 0.821 + }, + { + "text": "l'étais", + "start": 2.38, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.84, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.84, + "end": 3.26, + "confidence": 0.908 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.42, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.42, + "end": 3.62, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.08, + "end": 7.92, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.08, + "end": 4.26, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.34, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.48, + "confidence": 0.567 + }, + { + "text": "la", + "start": 4.48, + "end": 4.66, + "confidence": 0.972 + }, + { + "text": "force", + "start": 4.66, + "end": 5.0, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.0, + "end": 5.2, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.88, + "confidence": 0.912 + }, + { + "text": "c'est", + "start": 5.88, + "end": 6.12, + "confidence": 0.879 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.26, + "confidence": 0.991 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.52, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.52, + "end": 7.38, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.38, + "end": 7.56, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 7.92, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.44, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.44, + "end": 8.6, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.6, + "end": 8.9, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.9, + "end": 9.1, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.1, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.32, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.32, + "end": 10.58, + "confidence": 0.955 + }, + { + "text": "elles.", + "start": 10.58, + "end": 10.88, + "confidence": 0.989 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.608 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.22, + "confidence": 0.769 + }, + { + "text": "dit", + "start": 11.22, + "end": 11.4, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.4, + "end": 11.56, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.56, + "end": 11.72, + "confidence": 0.48 + }, + { + "text": "la", + "start": 11.72, + "end": 11.78, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.78, + "end": 12.12, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.12, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.42, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.42, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 16.02, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.912, + "words": [ + { + "text": "Alors", + "start": 13.34, + "end": 13.62, + "confidence": 0.584 + }, + { + "text": "évidemment,", + "start": 13.62, + "end": 14.34, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.34, + "end": 14.38, + "confidence": 0.953 + }, + { + "text": "faudrait", + "start": 14.38, + "end": 14.74, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.74, + "end": 15.16, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.16, + "end": 15.52, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.52, + "end": 16.02, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.7, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.7, + "end": 17.06, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.06, + "end": 17.26, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.26, + "end": 17.88, + "confidence": 0.974 + }, + { + "text": "beaucoup", + "start": 17.88, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 19.84, + "end": 25.26, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.2362671656324374, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 19.84, + "end": 20.22, + "confidence": 0.943 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.52, + "confidence": 0.666 + }, + { + "text": "il", + "start": 20.52, + "end": 20.6, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.6, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.84, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.84, + "end": 20.96, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 20.96, + "end": 21.26, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.68, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.68, + "end": 21.9, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.9, + "end": 22.1, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.1, + "end": 22.32, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.48, + "confidence": 0.723 + }, + { + "text": "lesquels", + "start": 22.48, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.04, + "confidence": 0.71 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.966 + }, + { + "text": "nous", + "start": 23.54, + "end": 23.72, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.72, + "end": 23.82, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.82, + "end": 24.1, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.1, + "end": 24.24, + "confidence": 0.828 + }, + { + "text": "cliquer", + "start": 24.24, + "end": 24.5, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.5, + "end": 24.66, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.66, + "end": 24.94, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.94, + "end": 25.26, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.42, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07923513396173461, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.4465685985196615e-06, + "confidence": 0.969, + "words": [ + { + "text": "Sauf", + "start": 25.42, + "end": 25.76, + "confidence": 0.99 + }, + { + "text": "que", + "start": 25.76, + "end": 26.26, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.26, + "end": 26.66, + "confidence": 0.632 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.06, + "confidence": 0.996 + }, + { + "text": "ajoute", + "start": 27.06, + "end": 27.44, + "confidence": 0.991 + }, + { + "text": "le", + "start": 27.44, + "end": 27.62, + "confidence": 0.992 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.18, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 28.18, + "end": 28.22, + "confidence": 0.989 + }, + { + "text": "qui", + "start": 28.22, + "end": 28.28, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.28, + "end": 28.48, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.48, + "end": 28.68, + "confidence": 0.993 + }, + { + "text": "contact", + "start": 28.68, + "end": 29.1, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.1, + "end": 29.46, + "confidence": 0.985 + }, + { + "text": "direct,", + "start": 29.46, + "end": 30.22, + "confidence": 0.995 + }, + { + "text": "plus", + "start": 30.22, + "end": 30.26, + "confidence": 0.994 + }, + { + "text": "sensible.", + "start": 30.26, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07923513396173461, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.4465685985196615e-06, + "confidence": 0.892, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.22, + "confidence": 0.97 + }, + { + "text": "puis", + "start": 31.22, + "end": 31.36, + "confidence": 0.971 + }, + { + "text": "évidemment,", + "start": 31.36, + "end": 31.7, + "confidence": 0.875 + }, + { + "text": "il", + "start": 31.7, + "end": 31.74, + "confidence": 0.993 + }, + { + "text": "faudrait", + "start": 31.74, + "end": 31.94, + "confidence": 0.995 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.12, + "confidence": 0.839 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.34, + "confidence": 0.977 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.994 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.9, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.9, + "end": 33.18, + "confidence": 0.482 + }, + { + "text": "permettent", + "start": 33.18, + "end": 33.74, + "confidence": 0.992 + }, + { + "text": "de", + "start": 33.74, + "end": 33.96, + "confidence": 0.884 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.42, + "confidence": 0.959 + }, + { + "text": "le", + "start": 34.42, + "end": 34.52, + "confidence": 0.777 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.8, + "confidence": 0.984 + }, + { + "text": "touffu", + "start": 34.8, + "end": 35.32, + "confidence": 0.741 + }, + { + "text": "de", + "start": 35.32, + "end": 35.72, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.72, + "end": 35.78, + "confidence": 0.992 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.994 + }, + { + "text": "web", + "start": 36.24, + "end": 36.6, + "confidence": 0.854 + }, + { + "text": "pour", + "start": 36.6, + "end": 36.78, + "confidence": 0.583 + }, + { + "text": "aller", + "start": 36.78, + "end": 36.98, + "confidence": 0.987 + }, + { + "text": "directement", + "start": 36.98, + "end": 37.52, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.52, + "end": 37.68, + "confidence": 0.967 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.82, + "end": 46.58, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07923513396173461, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.4465685985196615e-06, + "confidence": 0.974, + "words": [ + { + "text": "Bref,", + "start": 37.82, + "end": 38.76, + "confidence": 0.987 + }, + { + "text": "tout", + "start": 38.76, + "end": 38.98, + "confidence": 0.711 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.42, + "end": 39.7, + "confidence": 0.993 + }, + { + "text": "sont", + "start": 39.7, + "end": 39.88, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.88, + "end": 40.16, + "confidence": 0.991 + }, + { + "text": "conditions", + "start": 40.16, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.68, + "end": 40.96, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.46, + "confidence": 0.997 + }, + { + "text": "de", + "start": 41.46, + "end": 41.6, + "confidence": 0.998 + }, + { + "text": "créer", + "start": 41.6, + "end": 42.06, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.06, + "end": 42.38, + "confidence": 0.998 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.6, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.6, + "end": 42.8, + "confidence": 0.78 + }, + { + "text": "Nicolas", + "start": 42.8, + "end": 43.26, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 43.26, + "end": 43.5, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.5, + "end": 43.7, + "confidence": 0.983 + }, + { + "text": "est", + "start": 43.7, + "end": 43.88, + "confidence": 0.991 + }, + { + "text": "vraisemblablement", + "start": 43.88, + "end": 44.98, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 44.98, + "end": 45.38, + "confidence": 0.981 + }, + { + "text": "dans", + "start": 45.38, + "end": 45.7, + "confidence": 0.969 + }, + { + "text": "l'histoire", + "start": 45.7, + "end": 45.98, + "confidence": 0.957 + }, + { + "text": "de", + "start": 45.98, + "end": 46.18, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.18, + "end": 46.58, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 46.6, + "end": 48.82, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07923513396173461, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.4465685985196615e-06, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 46.6, + "end": 47.24, + "confidence": 0.845 + }, + { + "text": "ça,", + "start": 47.24, + "end": 47.72, + "confidence": 0.934 + }, + { + "text": "ça", + "start": 47.72, + "end": 47.76, + "confidence": 0.977 + }, + { + "text": "soulève", + "start": 47.76, + "end": 47.84, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.84, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.26, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.26, + "end": 48.82, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.22, + "end": 55.46, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit?", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10683460109281225, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.2805365258827806e-05, + "confidence": 0.988, + "words": [ + { + "text": "Est-ce", + "start": 49.22, + "end": 49.62, + "confidence": 0.982 + }, + { + "text": "que", + "start": 49.62, + "end": 49.72, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.72, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 49.98, + "confidence": 0.999 + }, + { + "text": "que", + "start": 49.98, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.991 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.8, + "confidence": 0.995 + }, + { + "text": "induit", + "start": 51.8, + "end": 52.32, + "confidence": 0.976 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.983 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.72, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 52.72, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.44, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.66, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.66, + "end": 54.02, + "confidence": 0.915 + }, + { + "text": "aussi", + "start": 54.02, + "end": 54.54, + "confidence": 0.995 + }, + { + "text": "un", + "start": 54.54, + "end": 54.7, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 54.7, + "end": 55.0, + "confidence": 0.996 + }, + { + "text": "inédit?", + "start": 55.0, + "end": 55.46, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.46, + "end": 63.12, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone?", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10683460109281225, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.2805365258827806e-05, + "confidence": 0.965, + "words": [ + { + "text": "Je", + "start": 55.46, + "end": 55.9, + "confidence": 0.879 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.988 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.24, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.24, + "end": 56.36, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.36, + "end": 56.42, + "confidence": 0.99 + }, + { + "text": "le", + "start": 56.42, + "end": 56.58, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.88, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.88, + "end": 57.04, + "confidence": 0.985 + }, + { + "text": "a", + "start": 57.04, + "end": 57.18, + "confidence": 0.989 + }, + { + "text": "au", + "start": 57.18, + "end": 57.28, + "confidence": 0.967 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.994 + }, + { + "text": "est", + "start": 57.6, + "end": 57.92, + "confidence": 0.949 + }, + { + "text": "comparable", + "start": 57.92, + "end": 58.24, + "confidence": 0.997 + }, + { + "text": "à", + "start": 58.24, + "end": 58.48, + "confidence": 0.95 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.9, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.9, + "end": 59.32, + "confidence": 0.929 + }, + { + "text": "à", + "start": 59.32, + "end": 59.46, + "confidence": 0.959 + }, + { + "text": "d'autres", + "start": 59.46, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.96, + "confidence": 0.991 + }, + { + "text": "techniques", + "start": 59.96, + "end": 60.46, + "confidence": 0.983 + }, + { + "text": "comme", + "start": 60.46, + "end": 60.88, + "confidence": 0.586 + }, + { + "text": "la", + "start": 60.88, + "end": 61.5, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.5, + "end": 62.06, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.06, + "end": 62.36, + "confidence": 0.946 + }, + { + "text": "le", + "start": 62.36, + "end": 62.68, + "confidence": 0.998 + }, + { + "text": "téléphone?", + "start": 62.68, + "end": 63.12, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 63.36, + "end": 66.66, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10683460109281225, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.2805365258827806e-05, + "confidence": 0.936, + "words": [ + { + "text": "Il", + "start": 63.36, + "end": 65.42, + "confidence": 0.778 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.48, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.48, + "end": 65.54, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 65.54, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.22, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.22, + "end": 66.42, + "confidence": 0.601 + }, + { + "text": "fait.", + "start": 66.42, + "end": 66.66, + "confidence": 0.996 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.52, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10683460109281225, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.2805365258827806e-05, + "confidence": 0.953, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 66.98, + "confidence": 0.599 + }, + { + "text": "donc", + "start": 66.98, + "end": 67.08, + "confidence": 0.902 + }, + { + "text": "cette", + "start": 67.08, + "end": 67.28, + "confidence": 0.712 + }, + { + "text": "espèce", + "start": 67.28, + "end": 67.54, + "confidence": 0.996 + }, + { + "text": "de", + "start": 67.54, + "end": 67.68, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.68, + "end": 68.48, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.66, + "confidence": 0.98 + }, + { + "text": "la", + "start": 68.66, + "end": 68.94, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.94, + "end": 69.22, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.22, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.24, + "confidence": 0.997 + }, + { + "text": "c'est", + "start": 70.24, + "end": 70.38, + "confidence": 0.98 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.64, + "confidence": 0.978 + }, + { + "text": "et", + "start": 70.64, + "end": 70.76, + "confidence": 0.965 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.52, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 4884, + "start": 71.62, + "end": 76.48, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet,", + "tokens": [ + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 1719, + 368, + 8603, + 14964, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.10683460109281225, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.2805365258827806e-05, + "confidence": 0.815, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.86, + "confidence": 0.514 + }, + { + "text": "qu'on", + "start": 71.86, + "end": 72.12, + "confidence": 0.938 + }, + { + "text": "a", + "start": 72.12, + "end": 72.44, + "confidence": 0.982 + }, + { + "text": "l'impression,", + "start": 72.44, + "end": 73.56, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 73.56, + "end": 73.84, + "confidence": 0.964 + }, + { + "text": "le", + "start": 73.84, + "end": 74.0, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.0, + "end": 74.2, + "confidence": 0.998 + }, + { + "text": "les", + "start": 74.2, + "end": 74.4, + "confidence": 0.994 + }, + { + "text": "utilisateurs", + "start": 74.4, + "end": 74.84, + "confidence": 0.997 + }, + { + "text": "et", + "start": 74.84, + "end": 74.96, + "confidence": 0.331 + }, + { + "text": "les", + "start": 74.96, + "end": 75.0, + "confidence": 0.775 + }, + { + "text": "services,", + "start": 75.0, + "end": 75.22, + "confidence": 0.217 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.42, + "confidence": 0.768 + }, + { + "text": "dépendants", + "start": 75.42, + "end": 75.96, + "confidence": 0.79 + }, + { + "text": "de", + "start": 75.96, + "end": 76.08, + "confidence": 0.985 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.996 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.48, + "confidence": 0.996 + } + ] + }, + { + "id": 16, + "seek": 7684, + "start": 76.86, + "end": 83.26, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06925634075613583, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.771887572336709e-06, + "confidence": 0.917, + "words": [ + { + "text": "d'induire", + "start": 76.86, + "end": 77.08, + "confidence": 0.84 + }, + { + "text": "en", + "start": 77.08, + "end": 77.24, + "confidence": 0.617 + }, + { + "text": "fait", + "start": 77.24, + "end": 77.34, + "confidence": 0.994 + }, + { + "text": "une", + "start": 77.34, + "end": 77.52, + "confidence": 0.983 + }, + { + "text": "espèce", + "start": 77.52, + "end": 77.88, + "confidence": 0.996 + }, + { + "text": "de", + "start": 77.88, + "end": 78.48, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.48, + "end": 78.6, + "confidence": 0.601 + }, + { + "text": "de", + "start": 78.6, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.74, + "confidence": 0.967 + }, + { + "text": "le", + "start": 79.74, + "end": 79.92, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 79.92, + "end": 80.64, + "confidence": 0.992 + }, + { + "text": "qui", + "start": 80.64, + "end": 81.1, + "confidence": 0.695 + }, + { + "text": "rend", + "start": 81.1, + "end": 81.64, + "confidence": 0.907 + }, + { + "text": "de", + "start": 81.64, + "end": 81.78, + "confidence": 0.714 + }, + { + "text": "l'ampleur", + "start": 81.78, + "end": 82.02, + "confidence": 0.987 + }, + { + "text": "et", + "start": 82.02, + "end": 82.12, + "confidence": 0.931 + }, + { + "text": "qui", + "start": 82.12, + "end": 82.24, + "confidence": 0.976 + }, + { + "text": "amène", + "start": 82.24, + "end": 82.36, + "confidence": 0.973 + }, + { + "text": "aussi", + "start": 82.36, + "end": 82.56, + "confidence": 0.939 + }, + { + "text": "à", + "start": 82.56, + "end": 82.64, + "confidence": 0.941 + }, + { + "text": "des", + "start": 82.64, + "end": 82.72, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.72, + "end": 82.9, + "confidence": 0.993 + }, + { + "text": "de", + "start": 82.9, + "end": 83.02, + "confidence": 0.997 + }, + { + "text": "rejet.", + "start": 83.02, + "end": 83.26, + "confidence": 0.884 + } + ] + }, + { + "id": 17, + "seek": 7684, + "start": 83.94, + "end": 87.8, + "text": " Donc, à objet inédit, rapport inédit.", + "tokens": [ + 7477, + 11, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06925634075613583, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.771887572336709e-06, + "confidence": 0.962, + "words": [ + { + "text": "Donc,", + "start": 83.94, + "end": 84.94, + "confidence": 0.971 + }, + { + "text": "à", + "start": 84.94, + "end": 84.98, + "confidence": 0.881 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.36, + "confidence": 0.827 + }, + { + "text": "inédit,", + "start": 85.36, + "end": 86.56, + "confidence": 0.993 + }, + { + "text": "rapport", + "start": 86.56, + "end": 87.0, + "confidence": 0.981 + }, + { + "text": "inédit.", + "start": 87.0, + "end": 87.8, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7684, + "start": 88.02, + "end": 95.14, + "text": " Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06925634075613583, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.771887572336709e-06, + "confidence": 0.972, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.48, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 88.48, + "end": 88.86, + "confidence": 0.975 + }, + { + "text": "rapport,", + "start": 88.86, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.84, + "confidence": 0.996 + }, + { + "text": "crois", + "start": 89.84, + "end": 89.88, + "confidence": 0.984 + }, + { + "text": "Nicolas,", + "start": 89.88, + "end": 90.54, + "confidence": 0.683 + }, + { + "text": "serait", + "start": 90.54, + "end": 90.94, + "confidence": 0.887 + }, + { + "text": "caractérisé", + "start": 90.94, + "end": 91.8, + "confidence": 0.993 + }, + { + "text": "par", + "start": 91.8, + "end": 92.12, + "confidence": 0.997 + }, + { + "text": "un", + "start": 92.12, + "end": 92.44, + "confidence": 0.997 + }, + { + "text": "mélange", + "start": 92.44, + "end": 92.98, + "confidence": 0.999 + }, + { + "text": "de", + "start": 92.98, + "end": 93.4, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.4, + "end": 94.24, + "confidence": 0.952 + }, + { + "text": "et", + "start": 94.24, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.68, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.68, + "end": 95.14, + "confidence": 0.993 + } + ] + }, + { + "id": 19, + "seek": 7684, + "start": 95.78, + "end": 102.86, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.06925634075613583, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.771887572336709e-06, + "confidence": 0.941, + "words": [ + { + "text": "Bon,", + "start": 95.78, + "end": 96.38, + "confidence": 0.793 + }, + { + "text": "en", + "start": 96.38, + "end": 96.52, + "confidence": 0.998 + }, + { + "text": "vrai,", + "start": 96.52, + "end": 97.14, + "confidence": 0.994 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.998 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.997 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.08, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.08, + "end": 98.58, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.58, + "end": 98.7, + "confidence": 0.767 + }, + { + "text": "finement", + "start": 98.7, + "end": 99.32, + "confidence": 0.849 + }, + { + "text": "toute", + "start": 99.32, + "end": 99.7, + "confidence": 0.984 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.06, + "confidence": 0.997 + }, + { + "text": "des", + "start": 100.06, + "end": 100.24, + "confidence": 0.998 + }, + { + "text": "objets", + "start": 100.24, + "end": 100.48, + "confidence": 0.999 + }, + { + "text": "techniques", + "start": 100.48, + "end": 101.02, + "confidence": 0.984 + }, + { + "text": "et", + "start": 101.02, + "end": 101.48, + "confidence": 0.531 + }, + { + "text": "de", + "start": 101.48, + "end": 101.68, + "confidence": 0.994 + }, + { + "text": "leur", + "start": 101.68, + "end": 101.84, + "confidence": 0.829 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.32, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.32, + "end": 102.48, + "confidence": 0.991 + }, + { + "text": "nos", + "start": 102.48, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.86, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 7684, + "start": 102.9, + "end": 105.74, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06925634075613583, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.771887572336709e-06, + "confidence": 0.981, + "words": [ + { + "text": "pour", + "start": 102.9, + "end": 103.06, + "confidence": 0.819 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.66, + "confidence": 0.997 + }, + { + "text": "si", + "start": 103.66, + "end": 103.76, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.997 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.26, + "end": 104.74, + "confidence": 0.998 + }, + { + "text": "totalement", + "start": 104.74, + "end": 105.3, + "confidence": 0.999 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.74, + "confidence": 0.999 + } + ] + }, + { + "id": 21, + "seek": 10584, + "start": 106.1, + "end": 109.34, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07393737035254909, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.3074043535161763e-05, + "confidence": 0.94, + "words": [ + { + "text": "Mais", + "start": 106.1, + "end": 106.36, + "confidence": 0.947 + }, + { + "text": "j'ai", + "start": 106.36, + "end": 106.92, + "confidence": 0.94 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.36, + "confidence": 0.996 + }, + { + "text": "comme", + "start": 107.36, + "end": 107.56, + "confidence": 0.642 + }, + { + "text": "ça", + "start": 107.56, + "end": 107.82, + "confidence": 0.978 + }, + { + "text": "que", + "start": 107.82, + "end": 107.96, + "confidence": 0.976 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.46, + "confidence": 0.985 + }, + { + "text": "ne", + "start": 108.46, + "end": 108.66, + "confidence": 0.726 + }, + { + "text": "se", + "start": 108.66, + "end": 108.7, + "confidence": 0.991 + }, + { + "text": "trompe", + "start": 108.7, + "end": 108.88, + "confidence": 0.995 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.34, + "confidence": 0.991 + } + ] + }, + { + "id": 22, + "seek": 10584, + "start": 109.88, + "end": 114.98, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07393737035254909, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.3074043535161763e-05, + "confidence": 0.969, + "words": [ + { + "text": "Pour", + "start": 109.88, + "end": 110.08, + "confidence": 0.997 + }, + { + "text": "autant", + "start": 110.08, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.42, + "confidence": 0.988 + }, + { + "text": "je", + "start": 110.42, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 111.14, + "confidence": 0.962 + }, + { + "text": "il", + "start": 111.14, + "end": 111.18, + "confidence": 0.997 + }, + { + "text": "y", + "start": 111.18, + "end": 111.32, + "confidence": 0.992 + }, + { + "text": "a", + "start": 111.32, + "end": 111.36, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.36, + "end": 111.68, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.68, + "end": 111.88, + "confidence": 0.974 + }, + { + "text": "de", + "start": 111.88, + "end": 112.06, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.06, + "end": 112.6, + "confidence": 0.799 + }, + { + "text": "autour", + "start": 112.6, + "end": 112.94, + "confidence": 0.995 + }, + { + "text": "de", + "start": 112.94, + "end": 113.46, + "confidence": 0.997 + }, + { + "text": "la", + "start": 113.46, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.86, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.86, + "end": 114.06, + "confidence": 0.765 + }, + { + "text": "même", + "start": 114.06, + "end": 114.44, + "confidence": 0.996 + }, + { + "text": "du", + "start": 114.44, + "end": 114.6, + "confidence": 0.995 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 114.98, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 10584, + "start": 115.34, + "end": 119.84, + "text": " Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07393737035254909, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.3074043535161763e-05, + "confidence": 0.972, + "words": [ + { + "text": "Mais", + "start": 115.34, + "end": 115.72, + "confidence": 0.994 + }, + { + "text": "la", + "start": 115.72, + "end": 116.02, + "confidence": 0.936 + }, + { + "text": "dépendance", + "start": 116.02, + "end": 116.4, + "confidence": 0.997 + }, + { + "text": "n'était", + "start": 116.4, + "end": 116.62, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.16, + "confidence": 0.995 + }, + { + "text": "même", + "start": 117.16, + "end": 117.46, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 117.46, + "end": 117.78, + "confidence": 0.999 + }, + { + "text": "Donc", + "start": 117.78, + "end": 117.98, + "confidence": 0.806 + }, + { + "text": "le", + "start": 117.98, + "end": 118.34, + "confidence": 0.709 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.62, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.62, + "end": 118.78, + "confidence": 0.974 + }, + { + "text": "plus", + "start": 118.78, + "end": 118.94, + "confidence": 0.995 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.12, + "confidence": 0.987 + }, + { + "text": "pas", + "start": 119.12, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.38, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.38, + "end": 119.56, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.56, + "end": 119.84, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 10584, + "start": 119.98, + "end": 123.02, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07393737035254909, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.3074043535161763e-05, + "confidence": 0.991, + "words": [ + { + "text": "On", + "start": 119.98, + "end": 120.18, + "confidence": 0.996 + }, + { + "text": "peut", + "start": 120.18, + "end": 120.38, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.38, + "end": 120.66, + "confidence": 0.99 + }, + { + "text": "sa", + "start": 120.66, + "end": 120.88, + "confidence": 0.985 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.46, + "confidence": 0.984 + }, + { + "text": "en", + "start": 121.46, + "end": 121.56, + "confidence": 0.989 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.74, + "confidence": 0.998 + }, + { + "text": "besoin", + "start": 121.74, + "end": 122.1, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.1, + "end": 122.34, + "confidence": 0.987 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.68, + "confidence": 0.989 + }, + { + "text": "de", + "start": 122.68, + "end": 122.8, + "confidence": 0.998 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.02, + "confidence": 0.989 + } + ] + }, + { + "id": 25, + "seek": 10584, + "start": 123.28, + "end": 126.36, + "text": " Et bien, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3610, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07393737035254909, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.3074043535161763e-05, + "confidence": 0.895, + "words": [ + { + "text": "Et", + "start": 123.28, + "end": 123.46, + "confidence": 0.666 + }, + { + "text": "bien,", + "start": 123.46, + "end": 123.86, + "confidence": 0.47 + }, + { + "text": "le", + "start": 123.86, + "end": 123.98, + "confidence": 0.996 + }, + { + "text": "soir,", + "start": 123.98, + "end": 124.68, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 124.68, + "end": 124.9, + "confidence": 0.997 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.998 + }, + { + "text": "va", + "start": 125.02, + "end": 125.14, + "confidence": 0.996 + }, + { + "text": "se", + "start": 125.14, + "end": 125.38, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.38, + "end": 125.8, + "confidence": 0.987 + }, + { + "text": "on", + "start": 125.8, + "end": 126.02, + "confidence": 0.995 + }, + { + "text": "la", + "start": 126.02, + "end": 126.22, + "confidence": 0.801 + }, + { + "text": "laisse.", + "start": 126.22, + "end": 126.36, + "confidence": 0.999 + } + ] + }, + { + "id": 26, + "seek": 10584, + "start": 126.98, + "end": 130.48, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07393737035254909, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.3074043535161763e-05, + "confidence": 0.914, + "words": [ + { + "text": "On", + "start": 126.98, + "end": 127.32, + "confidence": 0.954 + }, + { + "text": "ne", + "start": 127.32, + "end": 127.36, + "confidence": 0.802 + }, + { + "text": "l'a", + "start": 127.36, + "end": 127.48, + "confidence": 0.974 + }, + { + "text": "pas", + "start": 127.48, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.8, + "confidence": 0.996 + }, + { + "text": "la", + "start": 127.8, + "end": 128.06, + "confidence": 0.994 + }, + { + "text": "main", + "start": 128.06, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.44, + "confidence": 0.934 + }, + { + "text": "on", + "start": 128.44, + "end": 128.62, + "confidence": 0.997 + }, + { + "text": "est", + "start": 128.62, + "end": 128.68, + "confidence": 0.993 + }, + { + "text": "au", + "start": 128.68, + "end": 129.04, + "confidence": 0.984 + }, + { + "text": "lit,", + "start": 129.04, + "end": 129.14, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.14, + "end": 129.26, + "confidence": 0.427 + }, + { + "text": "ne", + "start": 129.26, + "end": 129.3, + "confidence": 0.963 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.5, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 129.5, + "end": 129.68, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.68, + "end": 129.86, + "confidence": 0.668 + }, + { + "text": "chiottes.", + "start": 129.86, + "end": 130.48, + "confidence": 0.829 + } + ] + }, + { + "id": 27, + "seek": 13084, + "start": 130.86, + "end": 136.9, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06175626696962299, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.5689402061980218e-05, + "confidence": 0.955, + "words": [ + { + "text": "On", + "start": 130.86, + "end": 131.04, + "confidence": 0.983 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.28, + "end": 131.48, + "confidence": 0.996 + }, + { + "text": "énervé", + "start": 131.48, + "end": 132.22, + "confidence": 0.906 + }, + { + "text": "par", + "start": 132.22, + "end": 132.44, + "confidence": 0.991 + }, + { + "text": "son", + "start": 132.44, + "end": 132.7, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.7, + "end": 133.1, + "confidence": 0.832 + }, + { + "text": "qui", + "start": 133.1, + "end": 133.34, + "confidence": 0.919 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.76, + "confidence": 0.991 + }, + { + "text": "la", + "start": 133.76, + "end": 133.8, + "confidence": 0.992 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.08, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.08, + "end": 134.14, + "confidence": 0.997 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.6, + "confidence": 0.985 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.82, + "confidence": 0.98 + }, + { + "text": "une", + "start": 134.82, + "end": 135.2, + "confidence": 0.87 + }, + { + "text": "heure", + "start": 135.2, + "end": 135.36, + "confidence": 0.998 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.54, + "confidence": 0.991 + }, + { + "text": "soir", + "start": 135.54, + "end": 135.8, + "confidence": 0.996 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.96, + "confidence": 0.791 + }, + { + "text": "discuter", + "start": 135.96, + "end": 136.28, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.28, + "end": 136.48, + "confidence": 0.996 + }, + { + "text": "un", + "start": 136.48, + "end": 136.6, + "confidence": 0.999 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.9, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 13084, + "start": 137.28, + "end": 141.88, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06175626696962299, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.5689402061980218e-05, + "confidence": 0.958, + "words": [ + { + "text": "Mais", + "start": 137.28, + "end": 137.46, + "confidence": 0.993 + }, + { + "text": "ça", + "start": 137.46, + "end": 137.68, + "confidence": 0.938 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.94, + "confidence": 0.998 + }, + { + "text": "ressemblait", + "start": 137.94, + "end": 138.4, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 138.4, + "end": 138.76, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 138.94, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.1, + "confidence": 0.986 + }, + { + "text": "peut", + "start": 139.1, + "end": 139.48, + "confidence": 0.988 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.12, + "confidence": 0.997 + }, + { + "text": "à", + "start": 140.12, + "end": 140.32, + "confidence": 0.498 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.852 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.68, + "confidence": 0.989 + }, + { + "text": "même", + "start": 140.68, + "end": 140.92, + "confidence": 0.984 + }, + { + "text": "môme", + "start": 140.92, + "end": 141.34, + "confidence": 0.998 + }, + { + "text": "aujourd'hui,", + "start": 141.34, + "end": 141.88, + "confidence": 0.988 + } + ] + }, + { + "id": 29, + "seek": 13084, + "start": 142.14, + "end": 146.3, + "text": " continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe,", + "tokens": [ + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06175626696962299, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.5689402061980218e-05, + "confidence": 0.963, + "words": [ + { + "text": "continuellement", + "start": 142.14, + "end": 142.94, + "confidence": 0.971 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.18, + "confidence": 0.806 + }, + { + "text": "son", + "start": 143.18, + "end": 143.38, + "confidence": 0.995 + }, + { + "text": "smartphone", + "start": 143.38, + "end": 143.76, + "confidence": 0.978 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.94, + "confidence": 0.979 + }, + { + "text": "la", + "start": 143.94, + "end": 144.0, + "confidence": 0.996 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.36, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 144.36, + "end": 144.52, + "confidence": 0.836 + }, + { + "text": "si", + "start": 144.52, + "end": 144.64, + "confidence": 0.975 + }, + { + "text": "c'était", + "start": 144.64, + "end": 144.82, + "confidence": 0.991 + }, + { + "text": "une", + "start": 144.82, + "end": 145.06, + "confidence": 0.989 + }, + { + "text": "sorte", + "start": 145.06, + "end": 145.22, + "confidence": 0.997 + }, + { + "text": "de", + "start": 145.22, + "end": 145.3, + "confidence": 0.982 + }, + { + "text": "pacemaker", + "start": 145.3, + "end": 145.82, + "confidence": 0.917 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.3, + "confidence": 0.992 + } + ] + }, + { + "id": 30, + "seek": 13084, + "start": 146.34, + "end": 148.84, + "text": " comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06175626696962299, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.5689402061980218e-05, + "confidence": 0.989, + "words": [ + { + "text": "comme", + "start": 146.34, + "end": 146.58, + "confidence": 0.996 + }, + { + "text": "si", + "start": 146.58, + "end": 146.76, + "confidence": 0.994 + }, + { + "text": "le", + "start": 146.76, + "end": 146.86, + "confidence": 0.997 + }, + { + "text": "lâcher", + "start": 146.86, + "end": 147.36, + "confidence": 0.969 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.56, + "confidence": 0.993 + }, + { + "text": "entraîner", + "start": 147.56, + "end": 147.86, + "confidence": 0.978 + }, + { + "text": "sa", + "start": 147.86, + "end": 148.0, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.0, + "end": 148.22, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.22, + "end": 148.84, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 13084, + "start": 149.04, + "end": 151.96, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06175626696962299, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.5689402061980218e-05, + "confidence": 0.974, + "words": [ + { + "text": "Bon,", + "start": 149.04, + "end": 149.28, + "confidence": 0.917 + }, + { + "text": "je", + "start": 149.28, + "end": 149.32, + "confidence": 0.935 + }, + { + "text": "dis", + "start": 149.32, + "end": 149.46, + "confidence": 0.988 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.994 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.74, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.74, + "end": 149.88, + "confidence": 0.995 + }, + { + "text": "môme,", + "start": 149.88, + "end": 150.32, + "confidence": 0.998 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.52, + "confidence": 0.793 + }, + { + "text": "c'est", + "start": 150.52, + "end": 150.82, + "confidence": 0.982 + }, + { + "text": "évidemment", + "start": 150.82, + "end": 151.14, + "confidence": 0.98 + }, + { + "text": "valable", + "start": 151.14, + "end": 151.48, + "confidence": 0.998 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.62, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 151.62, + "end": 151.76, + "confidence": 0.999 + }, + { + "text": "aussi.", + "start": 151.76, + "end": 151.96, + "confidence": 0.996 + } + ] + }, + { + "id": 32, + "seek": 13084, + "start": 152.34, + "end": 158.22, + "text": " Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais?", + "tokens": [ + 7477, + 11, + 18018, + 294, + 7811, + 270, + 13, + 413, + 6, + 19947, + 13, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.06175626696962299, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.5689402061980218e-05, + "confidence": 0.948, + "words": [ + { + "text": "Donc,", + "start": 152.34, + "end": 153.46, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 153.46, + "end": 153.66, + "confidence": 0.977 + }, + { + "text": "inédit.", + "start": 153.66, + "end": 154.24, + "confidence": 0.997 + }, + { + "text": "D'accord.", + "start": 154.24, + "end": 155.48, + "confidence": 0.979 + }, + { + "text": "Mais", + "start": 155.48, + "end": 155.82, + "confidence": 0.56 + }, + { + "text": "pourquoi", + "start": 155.82, + "end": 156.32, + "confidence": 0.994 + }, + { + "text": "a-t-on", + "start": 156.32, + "end": 156.68, + "confidence": 0.94 + }, + { + "text": "l'impression", + "start": 156.68, + "end": 157.06, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.26, + "confidence": 0.995 + }, + { + "text": "n'en", + "start": 157.26, + "end": 157.44, + "confidence": 0.878 + }, + { + "text": "sortira", + "start": 157.44, + "end": 157.9, + "confidence": 0.989 + }, + { + "text": "jamais?", + "start": 157.9, + "end": 158.22, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 15884, + "start": 158.86, + "end": 165.32, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux?", + "tokens": [ + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.07952147990733653, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.475811485666782e-05, + "confidence": 0.962, + "words": [ + { + "text": "Est-ce", + "start": 158.86, + "end": 159.34, + "confidence": 0.981 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.997 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.62, + "confidence": 0.999 + }, + { + "text": "en", + "start": 159.62, + "end": 159.78, + "confidence": 0.97 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.12, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.12, + "end": 160.34, + "confidence": 0.998 + }, + { + "text": "faute", + "start": 160.34, + "end": 160.66, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.66, + "end": 160.94, + "confidence": 0.995 + }, + { + "text": "les", + "start": 160.94, + "end": 161.28, + "confidence": 0.995 + }, + { + "text": "gens", + "start": 161.28, + "end": 161.44, + "confidence": 1.0 + }, + { + "text": "qui", + "start": 161.44, + "end": 161.58, + "confidence": 0.984 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.72, + "confidence": 0.998 + }, + { + "text": "créé", + "start": 161.72, + "end": 162.3, + "confidence": 0.99 + }, + { + "text": "cet", + "start": 162.3, + "end": 162.46, + "confidence": 0.852 + }, + { + "text": "outil", + "start": 162.46, + "end": 162.78, + "confidence": 0.99 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.34, + "confidence": 0.994 + }, + { + "text": "et", + "start": 163.34, + "end": 163.5, + "confidence": 0.954 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.86, + "confidence": 0.992 + }, + { + "text": "et", + "start": 163.86, + "end": 163.92, + "confidence": 0.539 + }, + { + "text": "diabolique", + "start": 163.92, + "end": 164.4, + "confidence": 0.951 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.704 + }, + { + "text": "que", + "start": 164.66, + "end": 164.84, + "confidence": 0.99 + }, + { + "text": "merveilleux?", + "start": 164.84, + "end": 165.32, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15884, + "start": 166.34, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07952147990733653, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.475811485666782e-05, + "confidence": 0.984, + "words": [ + { + "text": "Les", + "start": 166.34, + "end": 167.04, + "confidence": 0.925 + }, + { + "text": "économistes", + "start": 167.04, + "end": 167.48, + "confidence": 0.998 + }, + { + "text": "parlent", + "start": 167.48, + "end": 167.68, + "confidence": 0.995 + }, + { + "text": "de", + "start": 167.68, + "end": 167.82, + "confidence": 0.995 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.36, + "confidence": 0.985 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.997 + }, + { + "text": "sentier.", + "start": 168.5, + "end": 168.82, + "confidence": 0.978 + } + ] + }, + { + "id": 35, + "seek": 16884, + "start": 168.86, + "end": 177.42, + "text": " C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 50364, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 871, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.10392295320828755, + "compression_ratio": 1.3916666666666666, + "no_speech_prob": 3.9441460103262216e-05, + "confidence": 0.93, + "words": [ + { + "text": "C'est", + "start": 168.86, + "end": 169.1, + "confidence": 0.952 + }, + { + "text": "l'idée", + "start": 169.1, + "end": 169.34, + "confidence": 0.992 + }, + { + "text": "qu'on", + "start": 169.34, + "end": 169.7, + "confidence": 0.824 + }, + { + "text": "est", + "start": 169.7, + "end": 169.88, + "confidence": 0.631 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.0, + "confidence": 0.993 + }, + { + "text": "un", + "start": 170.0, + "end": 170.54, + "confidence": 0.998 + }, + { + "text": "sentier", + "start": 170.54, + "end": 170.78, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 170.78, + "end": 170.84, + "confidence": 0.962 + }, + { + "text": "a", + "start": 170.84, + "end": 170.94, + "confidence": 0.98 + }, + { + "text": "été", + "start": 170.94, + "end": 171.12, + "confidence": 0.996 + }, + { + "text": "établi,", + "start": 171.12, + "end": 171.9, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.12, + "confidence": 0.988 + }, + { + "text": "volontairement", + "start": 172.12, + "end": 172.72, + "confidence": 0.965 + }, + { + "text": "en", + "start": 172.72, + "end": 172.8, + "confidence": 0.941 + }, + { + "text": "marchant", + "start": 172.8, + "end": 173.06, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.06, + "end": 174.24, + "confidence": 0.972 + }, + { + "text": "soit", + "start": 174.24, + "end": 174.92, + "confidence": 0.996 + }, + { + "text": "en", + "start": 174.92, + "end": 175.36, + "confidence": 0.991 + }, + { + "text": "définissant", + "start": 175.36, + "end": 175.5, + "confidence": 0.983 + }, + { + "text": "des", + "start": 175.5, + "end": 175.76, + "confidence": 0.984 + }, + { + "text": "bornes,", + "start": 175.76, + "end": 176.04, + "confidence": 0.975 + }, + { + "text": "en", + "start": 176.04, + "end": 176.08, + "confidence": 0.683 + }, + { + "text": "définissant", + "start": 176.08, + "end": 176.58, + "confidence": 0.997 + }, + { + "text": "une", + "start": 176.58, + "end": 176.82, + "confidence": 0.944 + }, + { + "text": "signalétique.", + "start": 176.82, + "end": 177.42, + "confidence": 0.643 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/data/words.wav b/tests/data/words.wav new file mode 100644 index 0000000000000000000000000000000000000000..62f594013e3f01d28b73172dfa013e1180ff2ba9 Binary files /dev/null and b/tests/data/words.wav differ diff --git a/tests/data/yes_punctuations.mp3.words.json b/tests/data/yes_punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..64ed44959c984ee2b8ee34cc27b8924b62bf0ee4 --- /dev/null +++ b/tests/data/yes_punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.4, + "end": 2.34, + "text": " Dis-moi, est-ce que l'avion vole?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50494 + ], + "temperature": 0.0, + "avg_logprob": -0.3014036907869227, + "compression_ratio": 0.8048780487804879, + "no_speech_prob": 0.05134102329611778, + "confidence": 0.925, + "words": [ + { + "text": "Dis-moi,", + "start": 0.4, + "end": 1.36, + "confidence": 0.801 + }, + { + "text": "est-ce", + "start": 1.36, + "end": 1.52, + "confidence": 0.966 + }, + { + "text": "que", + "start": 1.52, + "end": 1.64, + "confidence": 0.977 + }, + { + "text": "l'avion", + "start": 1.64, + "end": 2.02, + "confidence": 0.994 + }, + { + "text": "vole?", + "start": 2.02, + "end": 2.34, + "confidence": 0.891 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/accurate.tiny_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/accurate.tiny_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b8cef4748a2e67b9548f033c7facf02873fcab99 --- /dev/null +++ b/tests/expected/corner_cases.cpu/accurate.tiny_apollo11.mp3.words.json @@ -0,0 +1,1481 @@ +{ + "text": " I'm all around my business and we gotta eat recommendations for you on your door to the VA We have 18A's I'm all around my business and I'm all around my business and I'm all around my business I got a little bit of a problem with that, and you want to have the advice down My goal is to reach these things, not to reach out to them and they're out of the bag And I can't see anything else, I don't want to hear anything else I'm at the least, I'm not making the tag I can't see anything else Yeah, I'm taking a heavy out of the bag We're on the highway, we're on the highway, we're on the highway We're on the highway, we're on the highway We want to hear that, this is the case, we know we can't get that much of our business So I guess we'll be somewhere in our place, but it's not over Okay, it's not over, it's not over, it's not over", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.6, + "end": 5.28, + "text": " I'm all around my business and we gotta eat recommendations for you on your door to the VA", + "tokens": [ + 286, + 478, + 439, + 926, + 452, + 1606, + 293, + 321, + 3428, + 1862, + 10434, + 337, + 291, + 322, + 428, + 2853, + 281, + 264, + 18527 + ], + "temperature": 0.0, + "avg_logprob": -0.4247800021701389, + "compression_ratio": 6.281818181818182, + "no_speech_prob": 0.3518604338169098, + "confidence": 0.27, + "words": [ + { + "text": "I'm", + "start": 0.6, + "end": 0.74, + "confidence": 0.168 + }, + { + "text": "all", + "start": 0.74, + "end": 1.02, + "confidence": 0.083 + }, + { + "text": "around", + "start": 1.02, + "end": 1.24, + "confidence": 0.103 + }, + { + "text": "my", + "start": 1.24, + "end": 1.42, + "confidence": 0.129 + }, + { + "text": "business", + "start": 1.42, + "end": 1.62, + "confidence": 0.339 + }, + { + "text": "and", + "start": 1.62, + "end": 1.9, + "confidence": 0.613 + }, + { + "text": "we", + "start": 1.9, + "end": 1.98, + "confidence": 0.938 + }, + { + "text": "gotta", + "start": 1.98, + "end": 2.16, + "confidence": 0.402 + }, + { + "text": "eat", + "start": 2.16, + "end": 2.38, + "confidence": 0.246 + }, + { + "text": "recommendations", + "start": 2.38, + "end": 2.84, + "confidence": 0.111 + }, + { + "text": "for", + "start": 2.84, + "end": 3.48, + "confidence": 0.236 + }, + { + "text": "you", + "start": 3.48, + "end": 3.62, + "confidence": 0.95 + }, + { + "text": "on", + "start": 3.62, + "end": 3.82, + "confidence": 0.774 + }, + { + "text": "your", + "start": 3.82, + "end": 4.36, + "confidence": 0.598 + }, + { + "text": "door", + "start": 4.36, + "end": 4.62, + "confidence": 0.113 + }, + { + "text": "to", + "start": 4.62, + "end": 4.84, + "confidence": 0.194 + }, + { + "text": "the", + "start": 4.84, + "end": 4.9, + "confidence": 0.457 + }, + { + "text": "VA", + "start": 4.9, + "end": 5.28, + "confidence": 0.23 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 5.28, + "end": 6.82, + "text": " We have 18A's", + "tokens": [ + 492, + 362, + 2443, + 32, + 311 + ], + "temperature": 0.0, + "avg_logprob": -0.4247800021701389, + "compression_ratio": 6.281818181818182, + "no_speech_prob": 0.3518604338169098, + "confidence": 0.189, + "words": [ + { + "text": "We", + "start": 5.28, + "end": 5.6, + "confidence": 0.056 + }, + { + "text": "have", + "start": 5.6, + "end": 5.72, + "confidence": 0.61 + }, + { + "text": "18A's", + "start": 5.72, + "end": 6.82, + "confidence": 0.191 + } + ] + }, + { + "id": 2, + "seek": 700, + "start": 7.02, + "end": 24.14, + "text": " I'm all around my business and I'm all around my business and I'm all around my business", + "tokens": [ + 286, + 478, + 439, + 926, + 452, + 1606, + 293, + 286, + 478, + 439, + 926, + 452, + 1606, + 293, + 286, + 478, + 439, + 926, + 452, + 1606 + ], + "temperature": 0.0, + "avg_logprob": -0.8006511529286703, + "compression_ratio": 2.1463414634146343, + "no_speech_prob": 0.1812065988779068, + "confidence": 0.476, + "words": [ + { + "text": "I'm", + "start": 7.02, + "end": 11.1, + "confidence": 0.124 + }, + { + "text": "all", + "start": 11.1, + "end": 13.62, + "confidence": 0.143 + }, + { + "text": "around", + "start": 13.62, + "end": 13.66, + "confidence": 0.517 + }, + { + "text": "my", + "start": 13.66, + "end": 13.7, + "confidence": 0.294 + }, + { + "text": "business", + "start": 13.7, + "end": 13.74, + "confidence": 0.931 + }, + { + "text": "and", + "start": 13.74, + "end": 14.08, + "confidence": 0.249 + }, + { + "text": "I'm", + "start": 14.08, + "end": 14.48, + "confidence": 0.379 + }, + { + "text": "all", + "start": 14.48, + "end": 14.68, + "confidence": 0.368 + }, + { + "text": "around", + "start": 14.68, + "end": 14.72, + "confidence": 0.892 + }, + { + "text": "my", + "start": 14.72, + "end": 14.76, + "confidence": 0.743 + }, + { + "text": "business", + "start": 14.76, + "end": 14.8, + "confidence": 0.981 + }, + { + "text": "and", + "start": 14.8, + "end": 15.04, + "confidence": 0.391 + }, + { + "text": "I'm", + "start": 15.04, + "end": 16.06, + "confidence": 0.861 + }, + { + "text": "all", + "start": 16.06, + "end": 16.12, + "confidence": 0.674 + }, + { + "text": "around", + "start": 16.12, + "end": 16.8, + "confidence": 0.932 + }, + { + "text": "my", + "start": 16.8, + "end": 21.82, + "confidence": 0.765 + }, + { + "text": "business", + "start": 21.82, + "end": 24.14, + "confidence": 0.977 + } + ] + }, + { + "id": 3, + "seek": 3100, + "start": 31.24, + "end": 36.52, + "text": " I got a little bit of a problem with that, and you want to have the advice down", + "tokens": [ + 286, + 658, + 257, + 707, + 857, + 295, + 257, + 1154, + 365, + 300, + 11, + 293, + 291, + 528, + 281, + 362, + 264, + 5192, + 760 + ], + "temperature": 0.0, + "avg_logprob": -1.0431414517489346, + "compression_ratio": 1.8074866310160427, + "no_speech_prob": 0.09527108818292618, + "confidence": 0.336, + "words": [ + { + "text": "I", + "start": 31.24, + "end": 31.66, + "confidence": 0.447 + }, + { + "text": "got", + "start": 31.66, + "end": 31.96, + "confidence": 0.219 + }, + { + "text": "a", + "start": 31.96, + "end": 33.32, + "confidence": 0.159 + }, + { + "text": "little", + "start": 33.32, + "end": 33.52, + "confidence": 0.104 + }, + { + "text": "bit", + "start": 33.52, + "end": 33.74, + "confidence": 0.919 + }, + { + "text": "of", + "start": 33.74, + "end": 33.86, + "confidence": 0.903 + }, + { + "text": "a", + "start": 33.86, + "end": 33.94, + "confidence": 0.347 + }, + { + "text": "problem", + "start": 33.94, + "end": 34.12, + "confidence": 0.226 + }, + { + "text": "with", + "start": 34.12, + "end": 34.38, + "confidence": 0.395 + }, + { + "text": "that,", + "start": 34.38, + "end": 34.52, + "confidence": 0.175 + }, + { + "text": "and", + "start": 34.52, + "end": 34.58, + "confidence": 0.331 + }, + { + "text": "you", + "start": 34.58, + "end": 34.7, + "confidence": 0.511 + }, + { + "text": "want", + "start": 34.7, + "end": 35.0, + "confidence": 0.533 + }, + { + "text": "to", + "start": 35.0, + "end": 35.06, + "confidence": 0.568 + }, + { + "text": "have", + "start": 35.06, + "end": 35.36, + "confidence": 0.857 + }, + { + "text": "the", + "start": 35.36, + "end": 35.52, + "confidence": 0.519 + }, + { + "text": "advice", + "start": 35.52, + "end": 35.88, + "confidence": 0.077 + }, + { + "text": "down", + "start": 35.88, + "end": 36.52, + "confidence": 0.231 + } + ] + }, + { + "id": 4, + "seek": 3100, + "start": 37.68, + "end": 41.92, + "text": " My goal is to reach these things, not to reach out to them and they're out of the bag", + "tokens": [ + 1222, + 3387, + 307, + 281, + 2524, + 613, + 721, + 11, + 406, + 281, + 2524, + 484, + 281, + 552, + 293, + 436, + 434, + 484, + 295, + 264, + 3411 + ], + "temperature": 0.0, + "avg_logprob": -1.0431414517489346, + "compression_ratio": 1.8074866310160427, + "no_speech_prob": 0.09527108818292618, + "confidence": 0.312, + "words": [ + { + "text": "My", + "start": 37.68, + "end": 38.04, + "confidence": 0.443 + }, + { + "text": "goal", + "start": 38.04, + "end": 38.14, + "confidence": 0.106 + }, + { + "text": "is", + "start": 38.14, + "end": 38.34, + "confidence": 0.697 + }, + { + "text": "to", + "start": 38.34, + "end": 38.68, + "confidence": 0.952 + }, + { + "text": "reach", + "start": 38.68, + "end": 38.84, + "confidence": 0.185 + }, + { + "text": "these", + "start": 38.84, + "end": 39.12, + "confidence": 0.19 + }, + { + "text": "things,", + "start": 39.12, + "end": 39.62, + "confidence": 0.06 + }, + { + "text": "not", + "start": 39.62, + "end": 40.1, + "confidence": 0.865 + }, + { + "text": "to", + "start": 40.1, + "end": 40.22, + "confidence": 0.13 + }, + { + "text": "reach", + "start": 40.22, + "end": 40.36, + "confidence": 0.433 + }, + { + "text": "out", + "start": 40.36, + "end": 40.58, + "confidence": 0.376 + }, + { + "text": "to", + "start": 40.58, + "end": 40.72, + "confidence": 0.771 + }, + { + "text": "them", + "start": 40.72, + "end": 40.84, + "confidence": 0.296 + }, + { + "text": "and", + "start": 40.84, + "end": 40.88, + "confidence": 0.254 + }, + { + "text": "they're", + "start": 40.88, + "end": 41.1, + "confidence": 0.244 + }, + { + "text": "out", + "start": 41.1, + "end": 41.34, + "confidence": 0.162 + }, + { + "text": "of", + "start": 41.34, + "end": 41.58, + "confidence": 0.722 + }, + { + "text": "the", + "start": 41.58, + "end": 41.68, + "confidence": 0.562 + }, + { + "text": "bag", + "start": 41.68, + "end": 41.92, + "confidence": 0.277 + } + ] + }, + { + "id": 5, + "seek": 3100, + "start": 43.52, + "end": 47.08, + "text": " And I can't see anything else, I don't want to hear anything else", + "tokens": [ + 400, + 286, + 393, + 380, + 536, + 1340, + 1646, + 11, + 286, + 500, + 380, + 528, + 281, + 1568, + 1340, + 1646 + ], + "temperature": 0.0, + "avg_logprob": -1.0431414517489346, + "compression_ratio": 1.8074866310160427, + "no_speech_prob": 0.09527108818292618, + "confidence": 0.326, + "words": [ + { + "text": "And", + "start": 43.52, + "end": 43.56, + "confidence": 0.363 + }, + { + "text": "I", + "start": 43.56, + "end": 44.12, + "confidence": 0.396 + }, + { + "text": "can't", + "start": 44.12, + "end": 44.9, + "confidence": 0.248 + }, + { + "text": "see", + "start": 44.9, + "end": 45.08, + "confidence": 0.22 + }, + { + "text": "anything", + "start": 45.08, + "end": 45.64, + "confidence": 0.254 + }, + { + "text": "else,", + "start": 45.64, + "end": 45.68, + "confidence": 0.295 + }, + { + "text": "I", + "start": 45.68, + "end": 46.4, + "confidence": 0.191 + }, + { + "text": "don't", + "start": 46.4, + "end": 46.56, + "confidence": 0.615 + }, + { + "text": "want", + "start": 46.56, + "end": 46.6, + "confidence": 0.269 + }, + { + "text": "to", + "start": 46.6, + "end": 46.64, + "confidence": 0.744 + }, + { + "text": "hear", + "start": 46.64, + "end": 46.76, + "confidence": 0.262 + }, + { + "text": "anything", + "start": 46.76, + "end": 46.88, + "confidence": 0.287 + }, + { + "text": "else", + "start": 46.88, + "end": 47.08, + "confidence": 0.311 + } + ] + }, + { + "id": 6, + "seek": 3100, + "start": 48.24, + "end": 49.68, + "text": " I'm at the least, I'm not making the tag", + "tokens": [ + 286, + 478, + 412, + 264, + 1935, + 11, + 286, + 478, + 406, + 1455, + 264, + 6162 + ], + "temperature": 0.0, + "avg_logprob": -1.0431414517489346, + "compression_ratio": 1.8074866310160427, + "no_speech_prob": 0.09527108818292618, + "confidence": 0.379, + "words": [ + { + "text": "I'm", + "start": 48.24, + "end": 48.48, + "confidence": 0.231 + }, + { + "text": "at", + "start": 48.48, + "end": 48.52, + "confidence": 0.206 + }, + { + "text": "the", + "start": 48.52, + "end": 48.68, + "confidence": 0.811 + }, + { + "text": "least,", + "start": 48.68, + "end": 48.94, + "confidence": 0.235 + }, + { + "text": "I'm", + "start": 48.94, + "end": 49.0, + "confidence": 0.424 + }, + { + "text": "not", + "start": 49.0, + "end": 49.12, + "confidence": 0.938 + }, + { + "text": "making", + "start": 49.12, + "end": 49.26, + "confidence": 0.293 + }, + { + "text": "the", + "start": 49.26, + "end": 49.44, + "confidence": 0.747 + }, + { + "text": "tag", + "start": 49.44, + "end": 49.68, + "confidence": 0.298 + } + ] + }, + { + "id": 7, + "seek": 3100, + "start": 51.46, + "end": 52.88, + "text": " I can't see anything else", + "tokens": [ + 286, + 393, + 380, + 536, + 1340, + 1646 + ], + "temperature": 0.0, + "avg_logprob": -1.0431414517489346, + "compression_ratio": 1.8074866310160427, + "no_speech_prob": 0.09527108818292618, + "confidence": 0.412, + "words": [ + { + "text": "I", + "start": 51.46, + "end": 51.5, + "confidence": 0.173 + }, + { + "text": "can't", + "start": 51.5, + "end": 51.86, + "confidence": 0.374 + }, + { + "text": "see", + "start": 51.86, + "end": 52.2, + "confidence": 0.34 + }, + { + "text": "anything", + "start": 52.2, + "end": 52.5, + "confidence": 0.915 + }, + { + "text": "else", + "start": 52.5, + "end": 52.88, + "confidence": 0.653 + } + ] + }, + { + "id": 8, + "seek": 3100, + "start": 53.16, + "end": 54.7, + "text": " Yeah, I'm taking a heavy out of the bag", + "tokens": [ + 865, + 11, + 286, + 478, + 1940, + 257, + 4676, + 484, + 295, + 264, + 3411 + ], + "temperature": 0.0, + "avg_logprob": -1.0431414517489346, + "compression_ratio": 1.8074866310160427, + "no_speech_prob": 0.09527108818292618, + "confidence": 0.28, + "words": [ + { + "text": "Yeah,", + "start": 53.16, + "end": 53.38, + "confidence": 0.569 + }, + { + "text": "I'm", + "start": 53.38, + "end": 53.46, + "confidence": 0.59 + }, + { + "text": "taking", + "start": 53.46, + "end": 53.64, + "confidence": 0.881 + }, + { + "text": "a", + "start": 53.64, + "end": 53.76, + "confidence": 0.177 + }, + { + "text": "heavy", + "start": 53.76, + "end": 54.04, + "confidence": 0.1 + }, + { + "text": "out", + "start": 54.04, + "end": 54.28, + "confidence": 0.073 + }, + { + "text": "of", + "start": 54.28, + "end": 54.42, + "confidence": 0.948 + }, + { + "text": "the", + "start": 54.42, + "end": 54.6, + "confidence": 0.168 + }, + { + "text": "bag", + "start": 54.6, + "end": 54.7, + "confidence": 0.084 + } + ] + }, + { + "id": 9, + "seek": 5600, + "start": 56.46, + "end": 59.78, + "text": " We're on the highway, we're on the highway, we're on the highway", + "tokens": [ + 492, + 434, + 322, + 264, + 17205, + 11, + 321, + 434, + 322, + 264, + 17205, + 11, + 321, + 434, + 322, + 264, + 17205 + ], + "temperature": 0.0, + "avg_logprob": -0.750541016295716, + "compression_ratio": 1.901840490797546, + "no_speech_prob": 0.01289769820868969, + "confidence": 0.498, + "words": [ + { + "text": "We're", + "start": 56.46, + "end": 56.94, + "confidence": 0.281 + }, + { + "text": "on", + "start": 56.94, + "end": 57.18, + "confidence": 0.116 + }, + { + "text": "the", + "start": 57.18, + "end": 57.26, + "confidence": 0.261 + }, + { + "text": "highway,", + "start": 57.26, + "end": 57.78, + "confidence": 0.129 + }, + { + "text": "we're", + "start": 57.78, + "end": 58.32, + "confidence": 0.604 + }, + { + "text": "on", + "start": 58.32, + "end": 58.44, + "confidence": 0.813 + }, + { + "text": "the", + "start": 58.44, + "end": 58.74, + "confidence": 0.861 + }, + { + "text": "highway,", + "start": 58.74, + "end": 58.94, + "confidence": 0.925 + }, + { + "text": "we're", + "start": 58.94, + "end": 58.98, + "confidence": 0.742 + }, + { + "text": "on", + "start": 58.98, + "end": 59.02, + "confidence": 0.82 + }, + { + "text": "the", + "start": 59.02, + "end": 59.18, + "confidence": 0.937 + }, + { + "text": "highway", + "start": 59.18, + "end": 59.78, + "confidence": 0.948 + } + ] + }, + { + "id": 10, + "seek": 5600, + "start": 60.52, + "end": 66.02, + "text": " We're on the highway, we're on the highway", + "tokens": [ + 492, + 434, + 322, + 264, + 17205, + 11, + 321, + 434, + 322, + 264, + 17205 + ], + "temperature": 0.0, + "avg_logprob": -0.750541016295716, + "compression_ratio": 1.901840490797546, + "no_speech_prob": 0.01289769820868969, + "confidence": 0.785, + "words": [ + { + "text": "We're", + "start": 60.52, + "end": 60.66, + "confidence": 0.62 + }, + { + "text": "on", + "start": 60.66, + "end": 60.84, + "confidence": 0.437 + }, + { + "text": "the", + "start": 60.84, + "end": 60.88, + "confidence": 0.94 + }, + { + "text": "highway,", + "start": 60.88, + "end": 61.16, + "confidence": 0.977 + }, + { + "text": "we're", + "start": 61.16, + "end": 63.48, + "confidence": 0.865 + }, + { + "text": "on", + "start": 63.48, + "end": 63.62, + "confidence": 0.911 + }, + { + "text": "the", + "start": 63.62, + "end": 63.7, + "confidence": 0.892 + }, + { + "text": "highway", + "start": 63.7, + "end": 66.02, + "confidence": 0.945 + } + ] + }, + { + "id": 11, + "seek": 5600, + "start": 66.02, + "end": 69.04, + "text": " We want to hear that, this is the case, we know we can't get that much of our business", + "tokens": [ + 492, + 528, + 281, + 1568, + 300, + 11, + 341, + 307, + 264, + 1389, + 11, + 321, + 458, + 321, + 393, + 380, + 483, + 300, + 709, + 295, + 527, + 1606 + ], + "temperature": 0.0, + "avg_logprob": -0.750541016295716, + "compression_ratio": 1.901840490797546, + "no_speech_prob": 0.01289769820868969, + "confidence": 0.371, + "words": [ + { + "text": "We", + "start": 66.02, + "end": 66.06, + "confidence": 0.369 + }, + { + "text": "want", + "start": 66.06, + "end": 66.1, + "confidence": 0.335 + }, + { + "text": "to", + "start": 66.1, + "end": 66.14, + "confidence": 0.662 + }, + { + "text": "hear", + "start": 66.14, + "end": 66.18, + "confidence": 0.323 + }, + { + "text": "that,", + "start": 66.18, + "end": 66.22, + "confidence": 0.878 + }, + { + "text": "this", + "start": 66.22, + "end": 66.26, + "confidence": 0.265 + }, + { + "text": "is", + "start": 66.26, + "end": 66.3, + "confidence": 0.806 + }, + { + "text": "the", + "start": 66.3, + "end": 66.34, + "confidence": 0.286 + }, + { + "text": "case,", + "start": 66.34, + "end": 66.38, + "confidence": 0.195 + }, + { + "text": "we", + "start": 66.38, + "end": 66.92, + "confidence": 0.423 + }, + { + "text": "know", + "start": 66.92, + "end": 66.96, + "confidence": 0.736 + }, + { + "text": "we", + "start": 66.96, + "end": 67.06, + "confidence": 0.814 + }, + { + "text": "can't", + "start": 67.06, + "end": 67.3, + "confidence": 0.339 + }, + { + "text": "get", + "start": 67.3, + "end": 67.4, + "confidence": 0.219 + }, + { + "text": "that", + "start": 67.4, + "end": 67.64, + "confidence": 0.209 + }, + { + "text": "much", + "start": 67.64, + "end": 68.46, + "confidence": 0.213 + }, + { + "text": "of", + "start": 68.46, + "end": 68.7, + "confidence": 0.59 + }, + { + "text": "our", + "start": 68.7, + "end": 68.74, + "confidence": 0.679 + }, + { + "text": "business", + "start": 68.74, + "end": 69.04, + "confidence": 0.077 + } + ] + }, + { + "id": 12, + "seek": 5600, + "start": 69.04, + "end": 72.64, + "text": " So I guess we'll be somewhere in our place, but it's not over", + "tokens": [ + 407, + 286, + 2041, + 321, + 603, + 312, + 4079, + 294, + 527, + 1081, + 11, + 457, + 309, + 311, + 406, + 670 + ], + "temperature": 0.0, + "avg_logprob": -0.750541016295716, + "compression_ratio": 1.901840490797546, + "no_speech_prob": 0.01289769820868969, + "confidence": 0.435, + "words": [ + { + "text": "So", + "start": 69.04, + "end": 70.38, + "confidence": 0.736 + }, + { + "text": "I", + "start": 70.38, + "end": 70.52, + "confidence": 0.675 + }, + { + "text": "guess", + "start": 70.52, + "end": 70.68, + "confidence": 0.721 + }, + { + "text": "we'll", + "start": 70.68, + "end": 71.04, + "confidence": 0.462 + }, + { + "text": "be", + "start": 71.04, + "end": 71.22, + "confidence": 0.27 + }, + { + "text": "somewhere", + "start": 71.22, + "end": 71.36, + "confidence": 0.369 + }, + { + "text": "in", + "start": 71.36, + "end": 71.4, + "confidence": 0.107 + }, + { + "text": "our", + "start": 71.4, + "end": 71.5, + "confidence": 0.192 + }, + { + "text": "place,", + "start": 71.5, + "end": 71.92, + "confidence": 0.38 + }, + { + "text": "but", + "start": 71.92, + "end": 72.14, + "confidence": 0.431 + }, + { + "text": "it's", + "start": 72.14, + "end": 72.36, + "confidence": 0.939 + }, + { + "text": "not", + "start": 72.36, + "end": 72.4, + "confidence": 0.552 + }, + { + "text": "over", + "start": 72.4, + "end": 72.64, + "confidence": 0.308 + } + ] + }, + { + "id": 13, + "seek": 7400, + "start": 74.02, + "end": 78.4, + "text": " Okay, it's not over, it's not over, it's not over", + "tokens": [ + 50364, + 1033, + 11, + 309, + 311, + 406, + 670, + 11, + 309, + 311, + 406, + 670, + 11, + 309, + 311, + 406, + 670, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.405017099882427, + "compression_ratio": 1.6896551724137931, + "no_speech_prob": 0.01679050177335739, + "confidence": 0.705, + "words": [ + { + "text": "Okay,", + "start": 74.02, + "end": 75.9, + "confidence": 0.147 + }, + { + "text": "it's", + "start": 75.9, + "end": 76.16, + "confidence": 0.589 + }, + { + "text": "not", + "start": 76.16, + "end": 76.22, + "confidence": 0.956 + }, + { + "text": "over,", + "start": 76.22, + "end": 76.76, + "confidence": 0.89 + }, + { + "text": "it's", + "start": 76.76, + "end": 77.08, + "confidence": 0.647 + }, + { + "text": "not", + "start": 77.08, + "end": 77.18, + "confidence": 0.897 + }, + { + "text": "over,", + "start": 77.18, + "end": 77.36, + "confidence": 0.962 + }, + { + "text": "it's", + "start": 77.36, + "end": 77.86, + "confidence": 0.869 + }, + { + "text": "not", + "start": 77.86, + "end": 78.26, + "confidence": 0.955 + }, + { + "text": "over", + "start": 78.26, + "end": 78.4, + "confidence": 0.934 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/large_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/large_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2132c86c44928d8182b1d362dfc9798acda75397 --- /dev/null +++ b/tests/expected/corner_cases.cpu/large_apollo11.mp3.words.json @@ -0,0 +1,2212 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Doige's EAs, LM EG EAs, over. Go ahead. We'd like to see it as a selected one or two on the helmet. We're gonna have it in B1, and you can put the other one on Mike's helmet, we're still seeing it. Okay. All right. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. We got some. Yeah, we're taking next day autumn to heal up. You won't hack me on this. You won't hack me on this. With a cover, I tried it already. Okay, fine. We weren't sure that this was a suggestion. We thought you could check it out. I'm not sure we've already done that. I guess, whatever you come up with, just let us know. Okay, no problem. Okay, no problem.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 7.22, + "text": " Apollo 11, Houston, we got a recommendation for you on your Doige's EAs, LM EG EAs, over.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 1144, + 328, + 68, + 311, + 462, + 10884, + 11, + 46529, + 462, + 38, + 462, + 10884, + 11, + 670, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.6255288836599766, + "compression_ratio": 1.4371584699453552, + "no_speech_prob": 0.33187487721443176, + "confidence": 0.539, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.98, + "confidence": 0.791 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.878 + }, + { + "text": "Houston,", + "start": 1.54, + "end": 1.98, + "confidence": 0.828 + }, + { + "text": "we", + "start": 1.98, + "end": 2.02, + "confidence": 0.892 + }, + { + "text": "got", + "start": 2.02, + "end": 2.16, + "confidence": 0.688 + }, + { + "text": "a", + "start": 2.16, + "end": 2.46, + "confidence": 0.995 + }, + { + "text": "recommendation", + "start": 2.46, + "end": 3.16, + "confidence": 0.71 + }, + { + "text": "for", + "start": 3.16, + "end": 3.6, + "confidence": 0.845 + }, + { + "text": "you", + "start": 3.6, + "end": 3.94, + "confidence": 0.86 + }, + { + "text": "on", + "start": 3.94, + "end": 3.98, + "confidence": 0.862 + }, + { + "text": "your", + "start": 3.98, + "end": 4.28, + "confidence": 0.767 + }, + { + "text": "Doige's", + "start": 4.28, + "end": 5.14, + "confidence": 0.261 + }, + { + "text": "EAs,", + "start": 5.14, + "end": 5.62, + "confidence": 0.347 + }, + { + "text": "LM", + "start": 5.62, + "end": 5.7, + "confidence": 0.275 + }, + { + "text": "EG", + "start": 5.7, + "end": 6.34, + "confidence": 0.3 + }, + { + "text": "EAs,", + "start": 6.34, + "end": 6.98, + "confidence": 0.695 + }, + { + "text": "over.", + "start": 6.98, + "end": 7.22, + "confidence": 0.838 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.84, + "end": 11.96, + "text": " Go ahead.", + "tokens": [ + 1037, + 2286, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.6255288836599766, + "compression_ratio": 1.4371584699453552, + "no_speech_prob": 0.33187487721443176, + "confidence": 0.788, + "words": [ + { + "text": "Go", + "start": 10.84, + "end": 11.06, + "confidence": 0.719 + }, + { + "text": "ahead.", + "start": 11.06, + "end": 11.96, + "confidence": 0.863 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.76, + "end": 17.48, + "text": " We'd like to see it as a selected one or two on the helmet.", + "tokens": [ + 492, + 1116, + 411, + 281, + 536, + 309, + 382, + 257, + 8209, + 472, + 420, + 732, + 322, + 264, + 15922, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.6255288836599766, + "compression_ratio": 1.4371584699453552, + "no_speech_prob": 0.33187487721443176, + "confidence": 0.359, + "words": [ + { + "text": "We'd", + "start": 12.76, + "end": 13.28, + "confidence": 0.31 + }, + { + "text": "like", + "start": 13.28, + "end": 13.62, + "confidence": 0.874 + }, + { + "text": "to", + "start": 13.62, + "end": 13.84, + "confidence": 0.728 + }, + { + "text": "see", + "start": 13.84, + "end": 14.14, + "confidence": 0.214 + }, + { + "text": "it", + "start": 14.14, + "end": 14.56, + "confidence": 0.067 + }, + { + "text": "as", + "start": 14.56, + "end": 14.6, + "confidence": 0.108 + }, + { + "text": "a", + "start": 14.6, + "end": 14.64, + "confidence": 0.223 + }, + { + "text": "selected", + "start": 14.64, + "end": 15.78, + "confidence": 0.136 + }, + { + "text": "one", + "start": 15.78, + "end": 16.14, + "confidence": 0.786 + }, + { + "text": "or", + "start": 16.14, + "end": 16.32, + "confidence": 0.644 + }, + { + "text": "two", + "start": 16.32, + "end": 16.6, + "confidence": 0.333 + }, + { + "text": "on", + "start": 16.6, + "end": 16.64, + "confidence": 0.858 + }, + { + "text": "the", + "start": 16.64, + "end": 16.98, + "confidence": 0.801 + }, + { + "text": "helmet.", + "start": 16.98, + "end": 17.48, + "confidence": 0.626 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 17.52, + "end": 24.34, + "text": " We're gonna have it in B1, and you can put the other one on Mike's helmet, we're still seeing it.", + "tokens": [ + 492, + 434, + 799, + 362, + 309, + 294, + 363, + 16, + 11, + 293, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 6602, + 311, + 15922, + 11, + 321, + 434, + 920, + 2577, + 309, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.6255288836599766, + "compression_ratio": 1.4371584699453552, + "no_speech_prob": 0.33187487721443176, + "confidence": 0.646, + "words": [ + { + "text": "We're", + "start": 17.52, + "end": 17.94, + "confidence": 0.673 + }, + { + "text": "gonna", + "start": 17.94, + "end": 18.12, + "confidence": 0.19 + }, + { + "text": "have", + "start": 18.12, + "end": 18.3, + "confidence": 0.848 + }, + { + "text": "it", + "start": 18.3, + "end": 18.4, + "confidence": 0.317 + }, + { + "text": "in", + "start": 18.4, + "end": 18.48, + "confidence": 0.667 + }, + { + "text": "B1,", + "start": 18.48, + "end": 20.24, + "confidence": 0.883 + }, + { + "text": "and", + "start": 20.24, + "end": 20.28, + "confidence": 0.824 + }, + { + "text": "you", + "start": 20.28, + "end": 20.38, + "confidence": 0.892 + }, + { + "text": "can", + "start": 20.38, + "end": 20.56, + "confidence": 0.513 + }, + { + "text": "put", + "start": 20.56, + "end": 20.74, + "confidence": 0.895 + }, + { + "text": "the", + "start": 20.74, + "end": 20.9, + "confidence": 0.812 + }, + { + "text": "other", + "start": 20.9, + "end": 21.12, + "confidence": 0.773 + }, + { + "text": "one", + "start": 21.12, + "end": 21.42, + "confidence": 0.778 + }, + { + "text": "on", + "start": 21.42, + "end": 21.82, + "confidence": 0.905 + }, + { + "text": "Mike's", + "start": 21.82, + "end": 22.96, + "confidence": 0.876 + }, + { + "text": "helmet,", + "start": 22.96, + "end": 23.12, + "confidence": 0.772 + }, + { + "text": "we're", + "start": 23.12, + "end": 23.26, + "confidence": 0.455 + }, + { + "text": "still", + "start": 23.26, + "end": 23.42, + "confidence": 0.773 + }, + { + "text": "seeing", + "start": 23.42, + "end": 23.72, + "confidence": 0.667 + }, + { + "text": "it.", + "start": 23.72, + "end": 24.34, + "confidence": 0.243 + } + ] + }, + { + "id": 4, + "seek": 2400, + "start": 25.44, + "end": 25.48, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.004, + "words": [ + { + "text": "Okay.", + "start": 25.44, + "end": 25.48, + "confidence": 0.004 + } + ] + }, + { + "id": 5, + "seek": 2400, + "start": 25.48, + "end": 26.0, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.281, + "words": [ + { + "text": "All", + "start": 25.48, + "end": 25.52, + "confidence": 0.093 + }, + { + "text": "right.", + "start": 25.52, + "end": 26.0, + "confidence": 0.849 + } + ] + }, + { + "id": 6, + "seek": 2400, + "start": 26.0, + "end": 27.5, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.06, + "words": [ + { + "text": "We", + "start": 26.0, + "end": 27.42, + "confidence": 0.08 + }, + { + "text": "got", + "start": 27.42, + "end": 27.46, + "confidence": 0.111 + }, + { + "text": "some.", + "start": 27.46, + "end": 27.5, + "confidence": 0.024 + } + ] + }, + { + "id": 7, + "seek": 2400, + "start": 28.4, + "end": 28.52, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.399, + "words": [ + { + "text": "We", + "start": 28.4, + "end": 28.44, + "confidence": 0.19 + }, + { + "text": "got", + "start": 28.44, + "end": 28.48, + "confidence": 0.527 + }, + { + "text": "some.", + "start": 28.48, + "end": 28.52, + "confidence": 0.633 + } + ] + }, + { + "id": 8, + "seek": 2400, + "start": 28.52, + "end": 28.96, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.468, + "words": [ + { + "text": "We", + "start": 28.52, + "end": 28.56, + "confidence": 0.231 + }, + { + "text": "got", + "start": 28.56, + "end": 28.6, + "confidence": 0.608 + }, + { + "text": "some.", + "start": 28.6, + "end": 28.96, + "confidence": 0.731 + } + ] + }, + { + "id": 9, + "seek": 2400, + "start": 28.96, + "end": 30.01, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.529, + "words": [ + { + "text": "We", + "start": 28.96, + "end": 29.0, + "confidence": 0.288 + }, + { + "text": "got", + "start": 29.0, + "end": 29.04, + "confidence": 0.675 + }, + { + "text": "some.", + "start": 29.04, + "end": 30.01, + "confidence": 0.761 + } + ] + }, + { + "id": 10, + "seek": 2400, + "start": 30.01, + "end": 31.5, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.596, + "words": [ + { + "text": "We", + "start": 30.01, + "end": 31.42, + "confidence": 0.378 + }, + { + "text": "got", + "start": 31.42, + "end": 31.46, + "confidence": 0.721 + }, + { + "text": "some.", + "start": 31.46, + "end": 31.5, + "confidence": 0.776 + } + ] + }, + { + "id": 11, + "seek": 2400, + "start": 31.5, + "end": 32.48, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.654, + "words": [ + { + "text": "We", + "start": 31.5, + "end": 32.14, + "confidence": 0.47 + }, + { + "text": "got", + "start": 32.14, + "end": 32.18, + "confidence": 0.757 + }, + { + "text": "some.", + "start": 32.18, + "end": 32.48, + "confidence": 0.787 + } + ] + }, + { + "id": 12, + "seek": 2400, + "start": 32.76, + "end": 33.26, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.707, + "words": [ + { + "text": "We", + "start": 32.76, + "end": 33.0, + "confidence": 0.573 + }, + { + "text": "got", + "start": 33.0, + "end": 33.22, + "confidence": 0.786 + }, + { + "text": "some.", + "start": 33.22, + "end": 33.26, + "confidence": 0.786 + } + ] + }, + { + "id": 13, + "seek": 2400, + "start": 33.26, + "end": 34.26, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.743, + "words": [ + { + "text": "We", + "start": 33.26, + "end": 33.3, + "confidence": 0.637 + }, + { + "text": "got", + "start": 33.3, + "end": 33.58, + "confidence": 0.811 + }, + { + "text": "some.", + "start": 33.58, + "end": 34.26, + "confidence": 0.794 + } + ] + }, + { + "id": 14, + "seek": 2400, + "start": 34.52, + "end": 35.06, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.761, + "words": [ + { + "text": "We", + "start": 34.52, + "end": 34.68, + "confidence": 0.672 + }, + { + "text": "got", + "start": 34.68, + "end": 35.02, + "confidence": 0.828 + }, + { + "text": "some.", + "start": 35.02, + "end": 35.06, + "confidence": 0.793 + } + ] + }, + { + "id": 15, + "seek": 2400, + "start": 35.18, + "end": 36.03, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.774, + "words": [ + { + "text": "We", + "start": 35.18, + "end": 35.48, + "confidence": 0.694 + }, + { + "text": "got", + "start": 35.48, + "end": 35.78, + "confidence": 0.843 + }, + { + "text": "some.", + "start": 35.78, + "end": 36.03, + "confidence": 0.792 + } + ] + }, + { + "id": 16, + "seek": 2400, + "start": 36.03, + "end": 36.16, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.787, + "words": [ + { + "text": "We", + "start": 36.03, + "end": 36.07, + "confidence": 0.714 + }, + { + "text": "got", + "start": 36.07, + "end": 36.11, + "confidence": 0.853 + }, + { + "text": "some.", + "start": 36.11, + "end": 36.16, + "confidence": 0.8 + } + ] + }, + { + "id": 17, + "seek": 2400, + "start": 37.66, + "end": 38.48, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.798, + "words": [ + { + "text": "We", + "start": 37.66, + "end": 38.0, + "confidence": 0.741 + }, + { + "text": "got", + "start": 38.0, + "end": 38.16, + "confidence": 0.86 + }, + { + "text": "some.", + "start": 38.16, + "end": 38.48, + "confidence": 0.797 + } + ] + }, + { + "id": 18, + "seek": 2400, + "start": 38.48, + "end": 39.2, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.798, + "words": [ + { + "text": "We", + "start": 38.48, + "end": 38.52, + "confidence": 0.739 + }, + { + "text": "got", + "start": 38.52, + "end": 38.56, + "confidence": 0.861 + }, + { + "text": "some.", + "start": 38.56, + "end": 39.2, + "confidence": 0.799 + } + ] + }, + { + "id": 19, + "seek": 2400, + "start": 40.16, + "end": 40.48, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.802, + "words": [ + { + "text": "We", + "start": 40.16, + "end": 40.2, + "confidence": 0.752 + }, + { + "text": "got", + "start": 40.2, + "end": 40.44, + "confidence": 0.862 + }, + { + "text": "some.", + "start": 40.44, + "end": 40.48, + "confidence": 0.797 + } + ] + }, + { + "id": 20, + "seek": 2400, + "start": 40.48, + "end": 40.68, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.802, + "words": [ + { + "text": "We", + "start": 40.48, + "end": 40.52, + "confidence": 0.751 + }, + { + "text": "got", + "start": 40.52, + "end": 40.56, + "confidence": 0.865 + }, + { + "text": "some.", + "start": 40.56, + "end": 40.68, + "confidence": 0.795 + } + ] + }, + { + "id": 21, + "seek": 2400, + "start": 41.36, + "end": 41.96, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.812, + "words": [ + { + "text": "We", + "start": 41.36, + "end": 41.4, + "confidence": 0.771 + }, + { + "text": "got", + "start": 41.4, + "end": 41.44, + "confidence": 0.871 + }, + { + "text": "some.", + "start": 41.44, + "end": 41.96, + "confidence": 0.798 + } + ] + }, + { + "id": 22, + "seek": 2400, + "start": 41.96, + "end": 42.08, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.811, + "words": [ + { + "text": "We", + "start": 41.96, + "end": 42.0, + "confidence": 0.766 + }, + { + "text": "got", + "start": 42.0, + "end": 42.04, + "confidence": 0.873 + }, + { + "text": "some.", + "start": 42.04, + "end": 42.08, + "confidence": 0.797 + } + ] + }, + { + "id": 23, + "seek": 2400, + "start": 43.84, + "end": 44.48, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.821, + "words": [ + { + "text": "We", + "start": 43.84, + "end": 44.02, + "confidence": 0.782 + }, + { + "text": "got", + "start": 44.02, + "end": 44.28, + "confidence": 0.876 + }, + { + "text": "some.", + "start": 44.28, + "end": 44.48, + "confidence": 0.806 + } + ] + }, + { + "id": 24, + "seek": 2400, + "start": 44.56, + "end": 45.03, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.827, + "words": [ + { + "text": "We", + "start": 44.56, + "end": 44.6, + "confidence": 0.799 + }, + { + "text": "got", + "start": 44.6, + "end": 44.64, + "confidence": 0.88 + }, + { + "text": "some.", + "start": 44.64, + "end": 45.03, + "confidence": 0.804 + } + ] + }, + { + "id": 25, + "seek": 2400, + "start": 45.03, + "end": 45.15, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.828, + "words": [ + { + "text": "We", + "start": 45.03, + "end": 45.07, + "confidence": 0.802 + }, + { + "text": "got", + "start": 45.07, + "end": 45.11, + "confidence": 0.881 + }, + { + "text": "some.", + "start": 45.11, + "end": 45.15, + "confidence": 0.802 + } + ] + }, + { + "id": 26, + "seek": 2400, + "start": 46.24, + "end": 47.08, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.826, + "words": [ + { + "text": "We", + "start": 46.24, + "end": 46.44, + "confidence": 0.796 + }, + { + "text": "got", + "start": 46.44, + "end": 46.8, + "confidence": 0.883 + }, + { + "text": "some.", + "start": 46.8, + "end": 47.08, + "confidence": 0.801 + } + ] + }, + { + "id": 27, + "seek": 2400, + "start": 47.96, + "end": 48.08, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.827, + "words": [ + { + "text": "We", + "start": 47.96, + "end": 48.0, + "confidence": 0.805 + }, + { + "text": "got", + "start": 48.0, + "end": 48.04, + "confidence": 0.886 + }, + { + "text": "some.", + "start": 48.04, + "end": 48.08, + "confidence": 0.795 + } + ] + }, + { + "id": 28, + "seek": 2400, + "start": 48.3, + "end": 48.74, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.829, + "words": [ + { + "text": "We", + "start": 48.3, + "end": 48.6, + "confidence": 0.801 + }, + { + "text": "got", + "start": 48.6, + "end": 48.66, + "confidence": 0.887 + }, + { + "text": "some.", + "start": 48.66, + "end": 48.74, + "confidence": 0.802 + } + ] + }, + { + "id": 29, + "seek": 2400, + "start": 50.16, + "end": 50.5, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.832, + "words": [ + { + "text": "We", + "start": 50.16, + "end": 50.42, + "confidence": 0.809 + }, + { + "text": "got", + "start": 50.42, + "end": 50.46, + "confidence": 0.888 + }, + { + "text": "some.", + "start": 50.46, + "end": 50.5, + "confidence": 0.804 + } + ] + }, + { + "id": 30, + "seek": 2400, + "start": 50.5, + "end": 51.36, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.832, + "words": [ + { + "text": "We", + "start": 50.5, + "end": 51.28, + "confidence": 0.808 + }, + { + "text": "got", + "start": 51.28, + "end": 51.32, + "confidence": 0.885 + }, + { + "text": "some.", + "start": 51.32, + "end": 51.36, + "confidence": 0.804 + } + ] + }, + { + "id": 31, + "seek": 2400, + "start": 51.36, + "end": 51.94, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.832, + "words": [ + { + "text": "We", + "start": 51.36, + "end": 51.86, + "confidence": 0.811 + }, + { + "text": "got", + "start": 51.86, + "end": 51.9, + "confidence": 0.885 + }, + { + "text": "some.", + "start": 51.9, + "end": 51.94, + "confidence": 0.803 + } + ] + }, + { + "id": 32, + "seek": 2400, + "start": 51.94, + "end": 52.86, + "text": " We got some.", + "tokens": [ + 492, + 658, + 512, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.3070566034041388, + "compression_ratio": 9.175, + "no_speech_prob": 0.6738554835319519, + "confidence": 0.822, + "words": [ + { + "text": "We", + "start": 51.94, + "end": 51.98, + "confidence": 0.781 + }, + { + "text": "got", + "start": 51.98, + "end": 52.32, + "confidence": 0.887 + }, + { + "text": "some.", + "start": 52.32, + "end": 52.86, + "confidence": 0.8 + } + ] + }, + { + "id": 33, + "seek": 5300, + "start": 53.16, + "end": 56.06, + "text": " Yeah, we're taking next day autumn to heal up.", + "tokens": [ + 865, + 11, + 321, + 434, + 1940, + 958, + 786, + 24604, + 281, + 10526, + 493, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.348, + "words": [ + { + "text": "Yeah,", + "start": 53.16, + "end": 53.5, + "confidence": 0.243 + }, + { + "text": "we're", + "start": 53.5, + "end": 53.62, + "confidence": 0.597 + }, + { + "text": "taking", + "start": 53.62, + "end": 53.7, + "confidence": 0.694 + }, + { + "text": "next", + "start": 53.7, + "end": 53.98, + "confidence": 0.247 + }, + { + "text": "day", + "start": 53.98, + "end": 54.2, + "confidence": 0.555 + }, + { + "text": "autumn", + "start": 54.2, + "end": 54.4, + "confidence": 0.159 + }, + { + "text": "to", + "start": 54.4, + "end": 54.64, + "confidence": 0.186 + }, + { + "text": "heal", + "start": 54.64, + "end": 55.18, + "confidence": 0.129 + }, + { + "text": "up.", + "start": 55.18, + "end": 56.06, + "confidence": 0.817 + } + ] + }, + { + "id": 34, + "seek": 5300, + "start": 56.46, + "end": 57.86, + "text": " You won't hack me on this.", + "tokens": [ + 509, + 1582, + 380, + 10339, + 385, + 322, + 341, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.432, + "words": [ + { + "text": "You", + "start": 56.46, + "end": 56.8, + "confidence": 0.117 + }, + { + "text": "won't", + "start": 56.8, + "end": 57.2, + "confidence": 0.465 + }, + { + "text": "hack", + "start": 57.2, + "end": 57.46, + "confidence": 0.373 + }, + { + "text": "me", + "start": 57.46, + "end": 57.66, + "confidence": 0.898 + }, + { + "text": "on", + "start": 57.66, + "end": 57.82, + "confidence": 0.807 + }, + { + "text": "this.", + "start": 57.82, + "end": 57.86, + "confidence": 0.413 + } + ] + }, + { + "id": 35, + "seek": 5300, + "start": 57.86, + "end": 59.79, + "text": " You won't hack me on this.", + "tokens": [ + 509, + 1582, + 380, + 10339, + 385, + 322, + 341, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.832, + "words": [ + { + "text": "You", + "start": 57.86, + "end": 58.08, + "confidence": 0.612 + }, + { + "text": "won't", + "start": 58.08, + "end": 58.76, + "confidence": 0.928 + }, + { + "text": "hack", + "start": 58.76, + "end": 58.8, + "confidence": 0.809 + }, + { + "text": "me", + "start": 58.8, + "end": 59.2, + "confidence": 0.905 + }, + { + "text": "on", + "start": 59.2, + "end": 59.3, + "confidence": 0.884 + }, + { + "text": "this.", + "start": 59.3, + "end": 59.79, + "confidence": 0.808 + } + ] + }, + { + "id": 36, + "seek": 5300, + "start": 59.79, + "end": 62.08, + "text": " With a cover, I tried it already.", + "tokens": [ + 2022, + 257, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.638, + "words": [ + { + "text": "With", + "start": 59.79, + "end": 60.1, + "confidence": 0.363 + }, + { + "text": "a", + "start": 60.1, + "end": 60.14, + "confidence": 0.405 + }, + { + "text": "cover,", + "start": 60.14, + "end": 61.24, + "confidence": 0.814 + }, + { + "text": "I", + "start": 61.24, + "end": 61.28, + "confidence": 0.961 + }, + { + "text": "tried", + "start": 61.28, + "end": 61.56, + "confidence": 0.653 + }, + { + "text": "it", + "start": 61.56, + "end": 61.74, + "confidence": 0.73 + }, + { + "text": "already.", + "start": 61.74, + "end": 62.08, + "confidence": 0.785 + } + ] + }, + { + "id": 37, + "seek": 5300, + "start": 62.6, + "end": 65.32, + "text": " Okay, fine. We weren't sure that this was a suggestion.", + "tokens": [ + 1033, + 11, + 2489, + 13, + 492, + 4999, + 380, + 988, + 300, + 341, + 390, + 257, + 16541, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.722, + "words": [ + { + "text": "Okay,", + "start": 62.6, + "end": 62.92, + "confidence": 0.513 + }, + { + "text": "fine.", + "start": 62.92, + "end": 63.2, + "confidence": 0.861 + }, + { + "text": "We", + "start": 63.2, + "end": 63.36, + "confidence": 0.702 + }, + { + "text": "weren't", + "start": 63.36, + "end": 63.68, + "confidence": 0.909 + }, + { + "text": "sure", + "start": 63.68, + "end": 63.84, + "confidence": 0.829 + }, + { + "text": "that", + "start": 63.84, + "end": 64.24, + "confidence": 0.552 + }, + { + "text": "this", + "start": 64.24, + "end": 64.64, + "confidence": 0.741 + }, + { + "text": "was", + "start": 64.64, + "end": 64.8, + "confidence": 0.783 + }, + { + "text": "a", + "start": 64.8, + "end": 64.88, + "confidence": 0.819 + }, + { + "text": "suggestion.", + "start": 64.88, + "end": 65.32, + "confidence": 0.498 + } + ] + }, + { + "id": 38, + "seek": 5300, + "start": 66.76, + "end": 68.21, + "text": " We thought you could check it out.", + "tokens": [ + 492, + 1194, + 291, + 727, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.721, + "words": [ + { + "text": "We", + "start": 66.76, + "end": 66.8, + "confidence": 0.891 + }, + { + "text": "thought", + "start": 66.8, + "end": 66.84, + "confidence": 0.813 + }, + { + "text": "you", + "start": 66.84, + "end": 67.06, + "confidence": 0.325 + }, + { + "text": "could", + "start": 67.06, + "end": 67.24, + "confidence": 0.784 + }, + { + "text": "check", + "start": 67.24, + "end": 67.5, + "confidence": 0.751 + }, + { + "text": "it", + "start": 67.5, + "end": 67.7, + "confidence": 0.899 + }, + { + "text": "out.", + "start": 67.7, + "end": 68.21, + "confidence": 0.812 + } + ] + }, + { + "id": 39, + "seek": 5300, + "start": 68.21, + "end": 69.48, + "text": " I'm not sure we've already done that.", + "tokens": [ + 286, + 478, + 406, + 988, + 321, + 600, + 1217, + 1096, + 300, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.455, + "words": [ + { + "text": "I'm", + "start": 68.21, + "end": 68.5, + "confidence": 0.179 + }, + { + "text": "not", + "start": 68.5, + "end": 68.6, + "confidence": 0.258 + }, + { + "text": "sure", + "start": 68.6, + "end": 68.76, + "confidence": 0.761 + }, + { + "text": "we've", + "start": 68.76, + "end": 68.9, + "confidence": 0.495 + }, + { + "text": "already", + "start": 68.9, + "end": 68.94, + "confidence": 0.765 + }, + { + "text": "done", + "start": 68.94, + "end": 69.18, + "confidence": 0.799 + }, + { + "text": "that.", + "start": 69.18, + "end": 69.48, + "confidence": 0.878 + } + ] + }, + { + "id": 40, + "seek": 5300, + "start": 70.38, + "end": 72.62, + "text": " I guess, whatever you come up with, just let us know.", + "tokens": [ + 286, + 2041, + 11, + 2035, + 291, + 808, + 493, + 365, + 11, + 445, + 718, + 505, + 458, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.738, + "words": [ + { + "text": "I", + "start": 70.38, + "end": 70.52, + "confidence": 0.55 + }, + { + "text": "guess,", + "start": 70.52, + "end": 70.96, + "confidence": 0.903 + }, + { + "text": "whatever", + "start": 70.96, + "end": 71.18, + "confidence": 0.404 + }, + { + "text": "you", + "start": 71.18, + "end": 71.22, + "confidence": 0.89 + }, + { + "text": "come", + "start": 71.22, + "end": 71.54, + "confidence": 0.544 + }, + { + "text": "up", + "start": 71.54, + "end": 71.72, + "confidence": 0.892 + }, + { + "text": "with,", + "start": 71.72, + "end": 71.98, + "confidence": 0.816 + }, + { + "text": "just", + "start": 71.98, + "end": 72.16, + "confidence": 0.77 + }, + { + "text": "let", + "start": 72.16, + "end": 72.2, + "confidence": 0.898 + }, + { + "text": "us", + "start": 72.2, + "end": 72.34, + "confidence": 0.905 + }, + { + "text": "know.", + "start": 72.34, + "end": 72.62, + "confidence": 0.798 + } + ] + }, + { + "id": 41, + "seek": 5300, + "start": 74.18, + "end": 75.48, + "text": " Okay, no problem.", + "tokens": [ + 1033, + 11, + 572, + 1154, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.699, + "words": [ + { + "text": "Okay,", + "start": 74.18, + "end": 74.66, + "confidence": 0.789 + }, + { + "text": "no", + "start": 74.66, + "end": 74.88, + "confidence": 0.548 + }, + { + "text": "problem.", + "start": 74.88, + "end": 75.48, + "confidence": 0.79 + } + ] + }, + { + "id": 42, + "seek": 7544, + "start": 75.48, + "end": 76.86, + "text": " Okay, no problem.", + "tokens": [ + 50364, + 1033, + 11, + 572, + 1154, + 13, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.7493470907211304, + "compression_ratio": 0.68, + "no_speech_prob": 0.06937140226364136, + "confidence": 0.521, + "words": [ + { + "text": "Okay,", + "start": 75.48, + "end": 76.28, + "confidence": 0.386 + }, + { + "text": "no", + "start": 76.28, + "end": 76.32, + "confidence": 0.472 + }, + { + "text": "problem.", + "start": 76.32, + "end": 76.86, + "confidence": 0.775 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/nocond.random_music.mp4.words.json b/tests/expected/corner_cases.cpu/nocond.random_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..cffe4045f2a305e00267c28fb344c5011039e8b1 --- /dev/null +++ b/tests/expected/corner_cases.cpu/nocond.random_music.mp4.words.json @@ -0,0 +1,3209 @@ +{ + "text": " I Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself But I'm ready Your words up on the wall You don't need to say I'm ready your words up on the wall as you're praying for my phone and the laughter in the holes and the names that I've I stack it in my mind and I'm waiting for the time When I show you what it's like to be worse than a mind Tell you you're the greatest But once you turn they hate us Oh, the misery Everybody wants to be my enemy I smell the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay I'm hoping that somebody pray for me I'm praying that somebody vote for me I'm staying where nobody's supposed to be I propose to be in a rink of emotions Ready to go whenever you let me know The road is long so put the pedal into the flow The enemy on my trail, my energy unavailable I'm a tell a monster the way go Way when the plot on my track to the top I been out of shape thinking that I'm a box I'm an astronaut Blast it off the planet rock the cause catastrophe And it matters more because ahead and not ahead I thought about wreaking havoc on an opposition Kinda shockin' they want it static with precision I'm automatic quarterback I ain't talkin' second Pack it pack it up I don't panic Better batter up who the baddest It don't matter cause we it's your Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Oh the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy I swear, I swear I'll never be a saint I swear, my enemy I swear, I swear I'll never be a saint You got to be yourself!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.02, + "end": 0.4, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.9367842674255371, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7794302701950073, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 0.02, + "end": 0.4, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.02, + "end": 68.54, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.669, + "words": [ + { + "text": "Oh,", + "start": 60.02, + "end": 65.44, + "confidence": 0.084 + }, + { + "text": "the", + "start": 65.44, + "end": 65.48, + "confidence": 0.796 + }, + { + "text": "misery", + "start": 65.48, + "end": 66.08, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 66.08, + "end": 67.62, + "confidence": 0.431 + }, + { + "text": "wants", + "start": 67.62, + "end": 68.0, + "confidence": 0.983 + }, + { + "text": "to", + "start": 68.0, + "end": 68.18, + "confidence": 0.993 + }, + { + "text": "be", + "start": 68.18, + "end": 68.38, + "confidence": 0.996 + }, + { + "text": "my", + "start": 68.38, + "end": 68.5, + "confidence": 0.984 + }, + { + "text": "enemy", + "start": 68.5, + "end": 68.54, + "confidence": 0.984 + } + ] + }, + { + "id": 2, + "seek": 6000, + "start": 70.9, + "end": 75.92, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.932, + "words": [ + { + "text": "Spare", + "start": 70.9, + "end": 71.48, + "confidence": 0.789 + }, + { + "text": "the", + "start": 71.48, + "end": 71.7, + "confidence": 0.97 + }, + { + "text": "sympathy", + "start": 71.7, + "end": 72.56, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 72.56, + "end": 73.86, + "confidence": 0.959 + }, + { + "text": "wants", + "start": 73.86, + "end": 74.26, + "confidence": 0.994 + }, + { + "text": "to", + "start": 74.26, + "end": 74.42, + "confidence": 0.998 + }, + { + "text": "be", + "start": 74.42, + "end": 75.16, + "confidence": 0.999 + }, + { + "text": "my", + "start": 75.16, + "end": 75.64, + "confidence": 0.869 + }, + { + "text": "enemy", + "start": 75.64, + "end": 75.92, + "confidence": 0.997 + } + ] + }, + { + "id": 3, + "seek": 6000, + "start": 79.98, + "end": 80.5, + "text": " Look out for yourself My enemy", + "tokens": [ + 2053, + 484, + 337, + 1803, + 1222, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.778, + "words": [ + { + "text": "Look", + "start": 79.98, + "end": 80.12, + "confidence": 0.446 + }, + { + "text": "out", + "start": 80.12, + "end": 80.34, + "confidence": 0.99 + }, + { + "text": "for", + "start": 80.34, + "end": 80.38, + "confidence": 0.991 + }, + { + "text": "yourself", + "start": 80.38, + "end": 80.42, + "confidence": 0.971 + }, + { + "text": "My", + "start": 80.42, + "end": 80.46, + "confidence": 0.533 + }, + { + "text": "enemy", + "start": 80.46, + "end": 80.5, + "confidence": 0.979 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 80.5, + "end": 81.28, + "text": " Look out for yourself But I'm ready", + "tokens": [ + 2053, + 484, + 337, + 1803, + 583, + 286, + 478, + 1919 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.943, + "words": [ + { + "text": "Look", + "start": 80.5, + "end": 80.54, + "confidence": 0.844 + }, + { + "text": "out", + "start": 80.54, + "end": 80.58, + "confidence": 0.994 + }, + { + "text": "for", + "start": 80.58, + "end": 80.62, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 80.62, + "end": 80.66, + "confidence": 0.999 + }, + { + "text": "But", + "start": 80.66, + "end": 80.7, + "confidence": 0.767 + }, + { + "text": "I'm", + "start": 80.7, + "end": 81.04, + "confidence": 0.99 + }, + { + "text": "ready", + "start": 81.04, + "end": 81.28, + "confidence": 0.996 + } + ] + }, + { + "id": 5, + "seek": 6000, + "start": 86.14, + "end": 88.28, + "text": " Your words up on the wall You don't need to say", + "tokens": [ + 2260, + 2283, + 493, + 322, + 264, + 2929, + 509, + 500, + 380, + 643, + 281, + 584 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.361, + "words": [ + { + "text": "Your", + "start": 86.14, + "end": 86.4, + "confidence": 0.618 + }, + { + "text": "words", + "start": 86.4, + "end": 86.44, + "confidence": 0.786 + }, + { + "text": "up", + "start": 86.44, + "end": 86.7, + "confidence": 0.891 + }, + { + "text": "on", + "start": 86.7, + "end": 86.98, + "confidence": 0.996 + }, + { + "text": "the", + "start": 86.98, + "end": 87.1, + "confidence": 0.985 + }, + { + "text": "wall", + "start": 87.1, + "end": 87.62, + "confidence": 0.711 + }, + { + "text": "You", + "start": 87.62, + "end": 87.92, + "confidence": 0.084 + }, + { + "text": "don't", + "start": 87.92, + "end": 88.16, + "confidence": 0.172 + }, + { + "text": "need", + "start": 88.16, + "end": 88.2, + "confidence": 0.149 + }, + { + "text": "to", + "start": 88.2, + "end": 88.24, + "confidence": 0.466 + }, + { + "text": "say", + "start": 88.24, + "end": 88.28, + "confidence": 0.094 + } + ] + }, + { + "id": 6, + "seek": 8774, + "start": 88.28, + "end": 94.25, + "text": " I'm ready your words up on the wall as you're praying for my phone and the laughter in the holes and the names that I've", + "tokens": [ + 286, + 478, + 1919, + 428, + 2283, + 493, + 322, + 264, + 2929, + 382, + 291, + 434, + 15611, + 337, + 452, + 2593, + 293, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 286, + 600 + ], + "temperature": 0.4, + "avg_logprob": -0.26142611746060646, + "compression_ratio": 1.486842105263158, + "no_speech_prob": 0.8554685115814209, + "confidence": 0.773, + "words": [ + { + "text": "I'm", + "start": 88.28, + "end": 88.32, + "confidence": 0.459 + }, + { + "text": "ready", + "start": 88.32, + "end": 88.52, + "confidence": 0.976 + }, + { + "text": "your", + "start": 88.52, + "end": 88.9, + "confidence": 0.592 + }, + { + "text": "words", + "start": 88.9, + "end": 89.14, + "confidence": 0.987 + }, + { + "text": "up", + "start": 89.14, + "end": 89.46, + "confidence": 0.974 + }, + { + "text": "on", + "start": 89.46, + "end": 89.6, + "confidence": 0.979 + }, + { + "text": "the", + "start": 89.6, + "end": 89.66, + "confidence": 0.985 + }, + { + "text": "wall", + "start": 89.66, + "end": 90.12, + "confidence": 0.45 + }, + { + "text": "as", + "start": 90.12, + "end": 90.32, + "confidence": 0.489 + }, + { + "text": "you're", + "start": 90.32, + "end": 90.64, + "confidence": 0.817 + }, + { + "text": "praying", + "start": 90.64, + "end": 90.8, + "confidence": 0.962 + }, + { + "text": "for", + "start": 90.8, + "end": 91.06, + "confidence": 0.956 + }, + { + "text": "my", + "start": 91.06, + "end": 91.26, + "confidence": 0.965 + }, + { + "text": "phone", + "start": 91.26, + "end": 91.72, + "confidence": 0.63 + }, + { + "text": "and", + "start": 91.72, + "end": 91.9, + "confidence": 0.687 + }, + { + "text": "the", + "start": 91.9, + "end": 92.0, + "confidence": 0.948 + }, + { + "text": "laughter", + "start": 92.0, + "end": 92.36, + "confidence": 0.87 + }, + { + "text": "in", + "start": 92.36, + "end": 92.72, + "confidence": 0.905 + }, + { + "text": "the", + "start": 92.72, + "end": 92.88, + "confidence": 0.982 + }, + { + "text": "holes", + "start": 92.88, + "end": 93.24, + "confidence": 0.593 + }, + { + "text": "and", + "start": 93.24, + "end": 93.44, + "confidence": 0.6 + }, + { + "text": "the", + "start": 93.44, + "end": 93.52, + "confidence": 0.983 + }, + { + "text": "names", + "start": 93.52, + "end": 93.82, + "confidence": 0.98 + }, + { + "text": "that", + "start": 93.82, + "end": 94.08, + "confidence": 0.827 + }, + { + "text": "I've", + "start": 94.08, + "end": 94.25, + "confidence": 0.75 + } + ] + }, + { + "id": 7, + "seek": 9418, + "start": 94.25, + "end": 97.84, + "text": " I stack it in my mind and I'm waiting for the time", + "tokens": [ + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 286, + 478, + 3806, + 337, + 264, + 565 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.764, + "words": [ + { + "text": "I", + "start": 94.25, + "end": 95.04, + "confidence": 0.29 + }, + { + "text": "stack", + "start": 95.04, + "end": 95.38, + "confidence": 0.341 + }, + { + "text": "it", + "start": 95.38, + "end": 95.7, + "confidence": 0.988 + }, + { + "text": "in", + "start": 95.7, + "end": 95.82, + "confidence": 0.987 + }, + { + "text": "my", + "start": 95.82, + "end": 96.02, + "confidence": 0.99 + }, + { + "text": "mind", + "start": 96.02, + "end": 96.42, + "confidence": 0.99 + }, + { + "text": "and", + "start": 96.42, + "end": 96.62, + "confidence": 0.382 + }, + { + "text": "I'm", + "start": 96.62, + "end": 96.72, + "confidence": 0.964 + }, + { + "text": "waiting", + "start": 96.72, + "end": 97.02, + "confidence": 0.952 + }, + { + "text": "for", + "start": 97.02, + "end": 97.34, + "confidence": 0.972 + }, + { + "text": "the", + "start": 97.34, + "end": 97.5, + "confidence": 0.986 + }, + { + "text": "time", + "start": 97.5, + "end": 97.84, + "confidence": 0.985 + } + ] + }, + { + "id": 8, + "seek": 9418, + "start": 98.0, + "end": 101.24, + "text": " When I show you what it's like to be worse than a mind", + "tokens": [ + 1133, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 813, + 257, + 1575 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.655, + "words": [ + { + "text": "When", + "start": 98.0, + "end": 98.18, + "confidence": 0.768 + }, + { + "text": "I", + "start": 98.18, + "end": 98.22, + "confidence": 0.989 + }, + { + "text": "show", + "start": 98.22, + "end": 98.48, + "confidence": 0.931 + }, + { + "text": "you", + "start": 98.48, + "end": 98.76, + "confidence": 0.988 + }, + { + "text": "what", + "start": 98.76, + "end": 98.94, + "confidence": 0.991 + }, + { + "text": "it's", + "start": 98.94, + "end": 99.42, + "confidence": 0.981 + }, + { + "text": "like", + "start": 99.42, + "end": 99.46, + "confidence": 0.997 + }, + { + "text": "to", + "start": 99.46, + "end": 99.72, + "confidence": 0.681 + }, + { + "text": "be", + "start": 99.72, + "end": 99.78, + "confidence": 0.995 + }, + { + "text": "worse", + "start": 99.78, + "end": 100.04, + "confidence": 0.659 + }, + { + "text": "than", + "start": 100.04, + "end": 100.36, + "confidence": 0.173 + }, + { + "text": "a", + "start": 100.36, + "end": 100.6, + "confidence": 0.112 + }, + { + "text": "mind", + "start": 100.6, + "end": 101.24, + "confidence": 0.463 + } + ] + }, + { + "id": 9, + "seek": 9418, + "start": 102.06, + "end": 106.28, + "text": " Tell you you're the greatest", + "tokens": [ + 5115, + 291, + 291, + 434, + 264, + 6636 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.919, + "words": [ + { + "text": "Tell", + "start": 102.06, + "end": 102.1, + "confidence": 0.861 + }, + { + "text": "you", + "start": 102.1, + "end": 102.72, + "confidence": 0.991 + }, + { + "text": "you're", + "start": 102.72, + "end": 103.66, + "confidence": 0.855 + }, + { + "text": "the", + "start": 103.66, + "end": 104.1, + "confidence": 0.985 + }, + { + "text": "greatest", + "start": 104.1, + "end": 106.28, + "confidence": 0.978 + } + ] + }, + { + "id": 10, + "seek": 9418, + "start": 107.14, + "end": 112.98, + "text": " But once you turn they hate us", + "tokens": [ + 583, + 1564, + 291, + 1261, + 436, + 4700, + 505 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.893, + "words": [ + { + "text": "But", + "start": 107.14, + "end": 107.54, + "confidence": 0.976 + }, + { + "text": "once", + "start": 107.54, + "end": 108.38, + "confidence": 0.799 + }, + { + "text": "you", + "start": 108.38, + "end": 108.92, + "confidence": 0.998 + }, + { + "text": "turn", + "start": 108.92, + "end": 109.84, + "confidence": 0.889 + }, + { + "text": "they", + "start": 109.84, + "end": 110.4, + "confidence": 0.662 + }, + { + "text": "hate", + "start": 110.4, + "end": 111.88, + "confidence": 0.99 + }, + { + "text": "us", + "start": 111.88, + "end": 112.98, + "confidence": 0.999 + } + ] + }, + { + "id": 11, + "seek": 9418, + "start": 114.54, + "end": 119.18, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.874, + "words": [ + { + "text": "Oh,", + "start": 114.54, + "end": 115.32, + "confidence": 0.74 + }, + { + "text": "the", + "start": 115.32, + "end": 115.36, + "confidence": 0.97 + }, + { + "text": "misery", + "start": 115.36, + "end": 116.36, + "confidence": 0.999 + }, + { + "text": "Everybody", + "start": 116.36, + "end": 117.52, + "confidence": 0.432 + }, + { + "text": "wants", + "start": 117.52, + "end": 117.9, + "confidence": 0.989 + }, + { + "text": "to", + "start": 117.9, + "end": 118.24, + "confidence": 0.998 + }, + { + "text": "be", + "start": 118.24, + "end": 118.28, + "confidence": 0.998 + }, + { + "text": "my", + "start": 118.28, + "end": 118.54, + "confidence": 0.997 + }, + { + "text": "enemy", + "start": 118.54, + "end": 119.18, + "confidence": 0.984 + } + ] + }, + { + "id": 12, + "seek": 12018, + "start": 120.2, + "end": 122.16, + "text": " I smell the sympathy", + "tokens": [ + 286, + 4316, + 264, + 33240 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.755, + "words": [ + { + "text": "I", + "start": 120.2, + "end": 120.54, + "confidence": 0.42 + }, + { + "text": "smell", + "start": 120.54, + "end": 121.36, + "confidence": 0.889 + }, + { + "text": "the", + "start": 121.36, + "end": 121.76, + "confidence": 0.892 + }, + { + "text": "sympathy", + "start": 121.76, + "end": 122.16, + "confidence": 0.979 + } + ] + }, + { + "id": 13, + "seek": 12018, + "start": 122.16, + "end": 126.08, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.912, + "words": [ + { + "text": "Everybody", + "start": 122.16, + "end": 123.72, + "confidence": 0.923 + }, + { + "text": "wants", + "start": 123.72, + "end": 124.1, + "confidence": 0.984 + }, + { + "text": "to", + "start": 124.1, + "end": 124.68, + "confidence": 0.995 + }, + { + "text": "be", + "start": 124.68, + "end": 125.24, + "confidence": 0.996 + }, + { + "text": "my", + "start": 125.24, + "end": 125.5, + "confidence": 0.659 + }, + { + "text": "enemy", + "start": 125.5, + "end": 126.08, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 12018, + "start": 129.9, + "end": 130.86, + "text": " Look out for yourself", + "tokens": [ + 2053, + 484, + 337, + 1803 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.731, + "words": [ + { + "text": "Look", + "start": 129.9, + "end": 130.04, + "confidence": 0.313 + }, + { + "text": "out", + "start": 130.04, + "end": 130.3, + "confidence": 0.932 + }, + { + "text": "for", + "start": 130.3, + "end": 130.44, + "confidence": 0.991 + }, + { + "text": "yourself", + "start": 130.44, + "end": 130.86, + "confidence": 0.986 + } + ] + }, + { + "id": 15, + "seek": 12018, + "start": 131.52, + "end": 132.54, + "text": " My enemy", + "tokens": [ + 1222, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.937, + "words": [ + { + "text": "My", + "start": 131.52, + "end": 131.74, + "confidence": 0.891 + }, + { + "text": "enemy", + "start": 131.74, + "end": 132.54, + "confidence": 0.985 + } + ] + }, + { + "id": 16, + "seek": 12018, + "start": 136.06, + "end": 137.14, + "text": " Look out for yourself", + "tokens": [ + 2053, + 484, + 337, + 1803 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.991, + "words": [ + { + "text": "Look", + "start": 136.06, + "end": 136.18, + "confidence": 0.974 + }, + { + "text": "out", + "start": 136.18, + "end": 136.54, + "confidence": 0.994 + }, + { + "text": "for", + "start": 136.54, + "end": 136.74, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.74, + "end": 137.14, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 12018, + "start": 137.66, + "end": 140.18, + "text": " Look, okay I'm hoping that somebody pray for me", + "tokens": [ + 2053, + 11, + 1392, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.792, + "words": [ + { + "text": "Look,", + "start": 137.66, + "end": 138.46, + "confidence": 0.849 + }, + { + "text": "okay", + "start": 138.46, + "end": 138.6, + "confidence": 0.646 + }, + { + "text": "I'm", + "start": 138.6, + "end": 138.86, + "confidence": 0.658 + }, + { + "text": "hoping", + "start": 138.86, + "end": 139.12, + "confidence": 0.576 + }, + { + "text": "that", + "start": 139.12, + "end": 139.32, + "confidence": 0.958 + }, + { + "text": "somebody", + "start": 139.32, + "end": 139.52, + "confidence": 0.975 + }, + { + "text": "pray", + "start": 139.52, + "end": 139.8, + "confidence": 0.85 + }, + { + "text": "for", + "start": 139.8, + "end": 140.02, + "confidence": 0.899 + }, + { + "text": "me", + "start": 140.02, + "end": 140.18, + "confidence": 0.999 + } + ] + }, + { + "id": 18, + "seek": 12018, + "start": 140.38, + "end": 141.64, + "text": " I'm praying that somebody vote for me", + "tokens": [ + 286, + 478, + 15611, + 300, + 2618, + 4740, + 337, + 385 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.821, + "words": [ + { + "text": "I'm", + "start": 140.38, + "end": 140.58, + "confidence": 0.756 + }, + { + "text": "praying", + "start": 140.58, + "end": 140.62, + "confidence": 0.956 + }, + { + "text": "that", + "start": 140.62, + "end": 140.82, + "confidence": 0.959 + }, + { + "text": "somebody", + "start": 140.82, + "end": 141.06, + "confidence": 0.995 + }, + { + "text": "vote", + "start": 141.06, + "end": 141.32, + "confidence": 0.396 + }, + { + "text": "for", + "start": 141.32, + "end": 141.52, + "confidence": 0.999 + }, + { + "text": "me", + "start": 141.52, + "end": 141.64, + "confidence": 1.0 + } + ] + }, + { + "id": 19, + "seek": 12018, + "start": 141.72, + "end": 143.2, + "text": " I'm staying where nobody's supposed to be", + "tokens": [ + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.893, + "words": [ + { + "text": "I'm", + "start": 141.72, + "end": 141.9, + "confidence": 0.937 + }, + { + "text": "staying", + "start": 141.9, + "end": 142.12, + "confidence": 0.82 + }, + { + "text": "where", + "start": 142.12, + "end": 142.3, + "confidence": 0.976 + }, + { + "text": "nobody's", + "start": 142.3, + "end": 142.88, + "confidence": 0.735 + }, + { + "text": "supposed", + "start": 142.88, + "end": 142.92, + "confidence": 0.957 + }, + { + "text": "to", + "start": 142.92, + "end": 143.12, + "confidence": 0.998 + }, + { + "text": "be", + "start": 143.12, + "end": 143.2, + "confidence": 0.999 + } + ] + }, + { + "id": 20, + "seek": 12018, + "start": 143.2, + "end": 144.86, + "text": " I propose to be in a rink of emotions", + "tokens": [ + 286, + 17421, + 281, + 312, + 294, + 257, + 367, + 475, + 295, + 8462 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.554, + "words": [ + { + "text": "I", + "start": 143.2, + "end": 143.36, + "confidence": 0.354 + }, + { + "text": "propose", + "start": 143.36, + "end": 143.64, + "confidence": 0.323 + }, + { + "text": "to", + "start": 143.64, + "end": 143.86, + "confidence": 0.856 + }, + { + "text": "be", + "start": 143.86, + "end": 144.04, + "confidence": 0.579 + }, + { + "text": "in", + "start": 144.04, + "end": 144.18, + "confidence": 0.837 + }, + { + "text": "a", + "start": 144.18, + "end": 144.24, + "confidence": 0.631 + }, + { + "text": "rink", + "start": 144.24, + "end": 144.42, + "confidence": 0.309 + }, + { + "text": "of", + "start": 144.42, + "end": 144.54, + "confidence": 0.996 + }, + { + "text": "emotions", + "start": 144.54, + "end": 144.86, + "confidence": 0.952 + } + ] + }, + { + "id": 21, + "seek": 12018, + "start": 145.06, + "end": 146.28, + "text": " Ready to go whenever you let me know", + "tokens": [ + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.902, + "words": [ + { + "text": "Ready", + "start": 145.06, + "end": 145.26, + "confidence": 0.938 + }, + { + "text": "to", + "start": 145.26, + "end": 145.5, + "confidence": 0.997 + }, + { + "text": "go", + "start": 145.5, + "end": 145.58, + "confidence": 0.996 + }, + { + "text": "whenever", + "start": 145.58, + "end": 145.74, + "confidence": 0.85 + }, + { + "text": "you", + "start": 145.74, + "end": 145.92, + "confidence": 0.575 + }, + { + "text": "let", + "start": 145.92, + "end": 146.04, + "confidence": 0.97 + }, + { + "text": "me", + "start": 146.04, + "end": 146.16, + "confidence": 0.998 + }, + { + "text": "know", + "start": 146.16, + "end": 146.28, + "confidence": 0.992 + } + ] + }, + { + "id": 22, + "seek": 12018, + "start": 146.28, + "end": 147.84, + "text": " The road is long so put the pedal into the flow", + "tokens": [ + 440, + 3060, + 307, + 938, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.944, + "words": [ + { + "text": "The", + "start": 146.28, + "end": 146.5, + "confidence": 0.977 + }, + { + "text": "road", + "start": 146.5, + "end": 146.64, + "confidence": 0.989 + }, + { + "text": "is", + "start": 146.64, + "end": 146.74, + "confidence": 0.977 + }, + { + "text": "long", + "start": 146.74, + "end": 146.9, + "confidence": 0.936 + }, + { + "text": "so", + "start": 146.9, + "end": 147.04, + "confidence": 0.811 + }, + { + "text": "put", + "start": 147.04, + "end": 147.16, + "confidence": 0.94 + }, + { + "text": "the", + "start": 147.16, + "end": 147.3, + "confidence": 0.969 + }, + { + "text": "pedal", + "start": 147.3, + "end": 147.4, + "confidence": 0.947 + }, + { + "text": "into", + "start": 147.4, + "end": 147.58, + "confidence": 0.955 + }, + { + "text": "the", + "start": 147.58, + "end": 147.76, + "confidence": 0.993 + }, + { + "text": "flow", + "start": 147.76, + "end": 147.84, + "confidence": 0.902 + } + ] + }, + { + "id": 23, + "seek": 12018, + "start": 147.96, + "end": 149.78, + "text": " The enemy on my trail, my energy unavailable", + "tokens": [ + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.866, + "words": [ + { + "text": "The", + "start": 147.96, + "end": 148.08, + "confidence": 0.714 + }, + { + "text": "enemy", + "start": 148.08, + "end": 148.24, + "confidence": 0.473 + }, + { + "text": "on", + "start": 148.24, + "end": 148.44, + "confidence": 0.969 + }, + { + "text": "my", + "start": 148.44, + "end": 148.54, + "confidence": 0.977 + }, + { + "text": "trail,", + "start": 148.54, + "end": 148.76, + "confidence": 0.993 + }, + { + "text": "my", + "start": 148.76, + "end": 148.8, + "confidence": 0.988 + }, + { + "text": "energy", + "start": 148.8, + "end": 149.06, + "confidence": 0.993 + }, + { + "text": "unavailable", + "start": 149.06, + "end": 149.78, + "confidence": 0.935 + } + ] + }, + { + "id": 24, + "seek": 14968, + "start": 150.0, + "end": 151.16, + "text": " I'm a tell a monster the way go", + "tokens": [ + 286, + 478, + 257, + 980, + 257, + 10090, + 264, + 636, + 352 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.443, + "words": [ + { + "text": "I'm", + "start": 150.0, + "end": 150.18, + "confidence": 0.73 + }, + { + "text": "a", + "start": 150.18, + "end": 150.28, + "confidence": 0.203 + }, + { + "text": "tell", + "start": 150.28, + "end": 150.34, + "confidence": 0.507 + }, + { + "text": "a", + "start": 150.34, + "end": 150.46, + "confidence": 0.137 + }, + { + "text": "monster", + "start": 150.46, + "end": 150.66, + "confidence": 0.238 + }, + { + "text": "the", + "start": 150.66, + "end": 150.9, + "confidence": 0.585 + }, + { + "text": "way", + "start": 150.9, + "end": 150.98, + "confidence": 0.953 + }, + { + "text": "go", + "start": 150.98, + "end": 151.16, + "confidence": 0.664 + } + ] + }, + { + "id": 25, + "seek": 14968, + "start": 151.2, + "end": 152.54, + "text": " Way when the plot on my track to the top", + "tokens": [ + 9558, + 562, + 264, + 7542, + 322, + 452, + 2837, + 281, + 264, + 1192 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.517, + "words": [ + { + "text": "Way", + "start": 151.2, + "end": 151.38, + "confidence": 0.102 + }, + { + "text": "when", + "start": 151.38, + "end": 151.54, + "confidence": 0.434 + }, + { + "text": "the", + "start": 151.54, + "end": 151.66, + "confidence": 0.263 + }, + { + "text": "plot", + "start": 151.66, + "end": 151.78, + "confidence": 0.627 + }, + { + "text": "on", + "start": 151.78, + "end": 151.92, + "confidence": 0.605 + }, + { + "text": "my", + "start": 151.92, + "end": 152.0, + "confidence": 0.975 + }, + { + "text": "track", + "start": 152.0, + "end": 152.14, + "confidence": 0.338 + }, + { + "text": "to", + "start": 152.14, + "end": 152.32, + "confidence": 0.946 + }, + { + "text": "the", + "start": 152.32, + "end": 152.4, + "confidence": 0.991 + }, + { + "text": "top", + "start": 152.4, + "end": 152.54, + "confidence": 0.993 + } + ] + }, + { + "id": 26, + "seek": 14968, + "start": 152.54, + "end": 154.32, + "text": " I been out of shape thinking that I'm a box I'm an astronaut", + "tokens": [ + 286, + 668, + 484, + 295, + 3909, + 1953, + 300, + 286, + 478, + 257, + 2424, + 286, + 478, + 364, + 18516 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.667, + "words": [ + { + "text": "I", + "start": 152.54, + "end": 152.7, + "confidence": 0.972 + }, + { + "text": "been", + "start": 152.7, + "end": 152.76, + "confidence": 0.691 + }, + { + "text": "out", + "start": 152.76, + "end": 152.9, + "confidence": 0.72 + }, + { + "text": "of", + "start": 152.9, + "end": 153.0, + "confidence": 0.959 + }, + { + "text": "shape", + "start": 153.0, + "end": 153.1, + "confidence": 0.998 + }, + { + "text": "thinking", + "start": 153.1, + "end": 153.3, + "confidence": 0.385 + }, + { + "text": "that", + "start": 153.3, + "end": 153.5, + "confidence": 0.353 + }, + { + "text": "I'm", + "start": 153.5, + "end": 153.66, + "confidence": 0.572 + }, + { + "text": "a", + "start": 153.66, + "end": 153.7, + "confidence": 0.703 + }, + { + "text": "box", + "start": 153.7, + "end": 153.74, + "confidence": 0.874 + }, + { + "text": "I'm", + "start": 153.74, + "end": 153.92, + "confidence": 0.451 + }, + { + "text": "an", + "start": 153.92, + "end": 153.96, + "confidence": 0.976 + }, + { + "text": "astronaut", + "start": 153.96, + "end": 154.32, + "confidence": 0.926 + } + ] + }, + { + "id": 27, + "seek": 14968, + "start": 154.52, + "end": 156.34, + "text": " Blast it off the planet rock the cause catastrophe", + "tokens": [ + 2177, + 525, + 309, + 766, + 264, + 5054, + 3727, + 264, + 3082, + 36043 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.583, + "words": [ + { + "text": "Blast", + "start": 154.52, + "end": 154.76, + "confidence": 0.476 + }, + { + "text": "it", + "start": 154.76, + "end": 154.9, + "confidence": 0.954 + }, + { + "text": "off", + "start": 154.9, + "end": 155.02, + "confidence": 0.984 + }, + { + "text": "the", + "start": 155.02, + "end": 155.2, + "confidence": 0.956 + }, + { + "text": "planet", + "start": 155.2, + "end": 155.34, + "confidence": 0.99 + }, + { + "text": "rock", + "start": 155.34, + "end": 155.52, + "confidence": 0.24 + }, + { + "text": "the", + "start": 155.52, + "end": 155.68, + "confidence": 0.301 + }, + { + "text": "cause", + "start": 155.68, + "end": 155.88, + "confidence": 0.701 + }, + { + "text": "catastrophe", + "start": 155.88, + "end": 156.34, + "confidence": 0.444 + } + ] + }, + { + "id": 28, + "seek": 14968, + "start": 156.44, + "end": 158.08, + "text": " And it matters more because ahead and not ahead", + "tokens": [ + 400, + 309, + 7001, + 544, + 570, + 2286, + 293, + 406, + 2286 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.721, + "words": [ + { + "text": "And", + "start": 156.44, + "end": 156.64, + "confidence": 0.828 + }, + { + "text": "it", + "start": 156.64, + "end": 156.78, + "confidence": 0.715 + }, + { + "text": "matters", + "start": 156.78, + "end": 156.94, + "confidence": 0.994 + }, + { + "text": "more", + "start": 156.94, + "end": 157.16, + "confidence": 0.981 + }, + { + "text": "because", + "start": 157.16, + "end": 157.38, + "confidence": 0.909 + }, + { + "text": "ahead", + "start": 157.38, + "end": 157.64, + "confidence": 0.432 + }, + { + "text": "and", + "start": 157.64, + "end": 157.82, + "confidence": 0.616 + }, + { + "text": "not", + "start": 157.82, + "end": 157.94, + "confidence": 0.716 + }, + { + "text": "ahead", + "start": 157.94, + "end": 158.08, + "confidence": 0.524 + } + ] + }, + { + "id": 29, + "seek": 14968, + "start": 158.1, + "end": 159.74, + "text": " I thought about wreaking havoc on an opposition", + "tokens": [ + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.963, + "words": [ + { + "text": "I", + "start": 158.1, + "end": 158.24, + "confidence": 0.986 + }, + { + "text": "thought", + "start": 158.24, + "end": 158.36, + "confidence": 0.977 + }, + { + "text": "about", + "start": 158.36, + "end": 158.58, + "confidence": 0.975 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.84, + "confidence": 0.944 + }, + { + "text": "havoc", + "start": 158.84, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.3, + "confidence": 0.944 + }, + { + "text": "an", + "start": 159.3, + "end": 159.44, + "confidence": 0.913 + }, + { + "text": "opposition", + "start": 159.44, + "end": 159.74, + "confidence": 0.987 + } + ] + }, + { + "id": 30, + "seek": 14968, + "start": 159.88, + "end": 161.52, + "text": " Kinda shockin' they want it static with precision", + "tokens": [ + 35553, + 5588, + 259, + 6, + 436, + 528, + 309, + 13437, + 365, + 18356 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.537, + "words": [ + { + "text": "Kinda", + "start": 159.88, + "end": 160.1, + "confidence": 0.574 + }, + { + "text": "shockin'", + "start": 160.1, + "end": 160.5, + "confidence": 0.408 + }, + { + "text": "they", + "start": 160.5, + "end": 160.58, + "confidence": 0.371 + }, + { + "text": "want", + "start": 160.58, + "end": 160.74, + "confidence": 0.454 + }, + { + "text": "it", + "start": 160.74, + "end": 160.84, + "confidence": 0.577 + }, + { + "text": "static", + "start": 160.84, + "end": 161.02, + "confidence": 0.993 + }, + { + "text": "with", + "start": 161.02, + "end": 161.22, + "confidence": 0.609 + }, + { + "text": "precision", + "start": 161.22, + "end": 161.52, + "confidence": 0.877 + } + ] + }, + { + "id": 31, + "seek": 14968, + "start": 161.62, + "end": 163.36, + "text": " I'm automatic quarterback I ain't talkin' second", + "tokens": [ + 286, + 478, + 12509, + 31952, + 286, + 7862, + 380, + 39243, + 6, + 1150 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.735, + "words": [ + { + "text": "I'm", + "start": 161.62, + "end": 161.78, + "confidence": 0.935 + }, + { + "text": "automatic", + "start": 161.78, + "end": 162.18, + "confidence": 0.975 + }, + { + "text": "quarterback", + "start": 162.18, + "end": 162.58, + "confidence": 0.419 + }, + { + "text": "I", + "start": 162.58, + "end": 162.8, + "confidence": 0.405 + }, + { + "text": "ain't", + "start": 162.8, + "end": 162.88, + "confidence": 0.968 + }, + { + "text": "talkin'", + "start": 162.88, + "end": 163.18, + "confidence": 0.783 + }, + { + "text": "second", + "start": 163.18, + "end": 163.36, + "confidence": 0.556 + } + ] + }, + { + "id": 32, + "seek": 14968, + "start": 163.4, + "end": 164.58, + "text": " Pack it pack it up I don't panic", + "tokens": [ + 18466, + 309, + 2844, + 309, + 493, + 286, + 500, + 380, + 14783 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.786, + "words": [ + { + "text": "Pack", + "start": 163.4, + "end": 163.6, + "confidence": 0.931 + }, + { + "text": "it", + "start": 163.6, + "end": 163.66, + "confidence": 0.88 + }, + { + "text": "pack", + "start": 163.66, + "end": 163.82, + "confidence": 0.298 + }, + { + "text": "it", + "start": 163.82, + "end": 164.04, + "confidence": 0.994 + }, + { + "text": "up", + "start": 164.04, + "end": 164.1, + "confidence": 0.977 + }, + { + "text": "I", + "start": 164.1, + "end": 164.2, + "confidence": 0.637 + }, + { + "text": "don't", + "start": 164.2, + "end": 164.54, + "confidence": 0.87 + }, + { + "text": "panic", + "start": 164.54, + "end": 164.58, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 14968, + "start": 164.58, + "end": 165.7, + "text": " Better batter up who the baddest", + "tokens": [ + 15753, + 4220, + 493, + 567, + 264, + 1578, + 23748 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.703, + "words": [ + { + "text": "Better", + "start": 164.58, + "end": 164.82, + "confidence": 0.627 + }, + { + "text": "batter", + "start": 164.82, + "end": 165.1, + "confidence": 0.265 + }, + { + "text": "up", + "start": 165.1, + "end": 165.24, + "confidence": 0.963 + }, + { + "text": "who", + "start": 165.24, + "end": 165.4, + "confidence": 0.861 + }, + { + "text": "the", + "start": 165.4, + "end": 165.52, + "confidence": 0.973 + }, + { + "text": "baddest", + "start": 165.52, + "end": 165.7, + "confidence": 0.797 + } + ] + }, + { + "id": 34, + "seek": 14968, + "start": 165.7, + "end": 166.74, + "text": " It don't matter cause we it's your", + "tokens": [ + 467, + 500, + 380, + 1871, + 3082, + 321, + 309, + 311, + 428 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.727, + "words": [ + { + "text": "It", + "start": 165.7, + "end": 165.9, + "confidence": 0.979 + }, + { + "text": "don't", + "start": 165.9, + "end": 166.14, + "confidence": 0.995 + }, + { + "text": "matter", + "start": 166.14, + "end": 166.18, + "confidence": 0.998 + }, + { + "text": "cause", + "start": 166.18, + "end": 166.4, + "confidence": 0.588 + }, + { + "text": "we", + "start": 166.4, + "end": 166.52, + "confidence": 0.973 + }, + { + "text": "it's", + "start": 166.52, + "end": 166.7, + "confidence": 0.404 + }, + { + "text": "your", + "start": 166.7, + "end": 166.74, + "confidence": 0.632 + } + ] + }, + { + "id": 35, + "seek": 14968, + "start": 166.8, + "end": 169.52, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.966, + "words": [ + { + "text": "Everybody", + "start": 166.8, + "end": 167.34, + "confidence": 0.896 + }, + { + "text": "wants", + "start": 167.34, + "end": 167.74, + "confidence": 0.97 + }, + { + "text": "to", + "start": 167.74, + "end": 167.94, + "confidence": 0.996 + }, + { + "text": "be", + "start": 167.94, + "end": 168.14, + "confidence": 0.996 + }, + { + "text": "my", + "start": 168.14, + "end": 168.46, + "confidence": 0.991 + }, + { + "text": "enemy", + "start": 168.46, + "end": 169.52, + "confidence": 0.95 + } + ] + }, + { + "id": 36, + "seek": 14968, + "start": 170.66, + "end": 172.2, + "text": " Spare the sympathy", + "tokens": [ + 1738, + 543, + 264, + 33240 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.687, + "words": [ + { + "text": "Spare", + "start": 170.66, + "end": 171.22, + "confidence": 0.494 + }, + { + "text": "the", + "start": 171.22, + "end": 171.46, + "confidence": 0.976 + }, + { + "text": "sympathy", + "start": 171.46, + "end": 172.2, + "confidence": 0.937 + } + ] + }, + { + "id": 37, + "seek": 14968, + "start": 172.96, + "end": 176.18, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.983, + "words": [ + { + "text": "Everybody", + "start": 172.96, + "end": 173.62, + "confidence": 0.994 + }, + { + "text": "wants", + "start": 173.62, + "end": 174.02, + "confidence": 0.99 + }, + { + "text": "to", + "start": 174.02, + "end": 174.36, + "confidence": 0.999 + }, + { + "text": "be", + "start": 174.36, + "end": 175.0, + "confidence": 0.999 + }, + { + "text": "my", + "start": 175.0, + "end": 175.4, + "confidence": 0.924 + }, + { + "text": "enemy", + "start": 175.4, + "end": 176.18, + "confidence": 0.996 + } + ] + }, + { + "id": 38, + "seek": 14968, + "start": 176.84, + "end": 178.44, + "text": " Oh the misery", + "tokens": [ + 876, + 264, + 32309 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.813, + "words": [ + { + "text": "Oh", + "start": 176.84, + "end": 177.48, + "confidence": 0.677 + }, + { + "text": "the", + "start": 177.48, + "end": 177.66, + "confidence": 0.794 + }, + { + "text": "misery", + "start": 177.66, + "end": 178.44, + "confidence": 0.998 + } + ] + }, + { + "id": 39, + "seek": 17906, + "start": 179.28, + "end": 181.44, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.955, + "words": [ + { + "text": "Everybody", + "start": 179.28, + "end": 179.84, + "confidence": 0.802 + }, + { + "text": "wants", + "start": 179.84, + "end": 180.22, + "confidence": 0.984 + }, + { + "text": "to", + "start": 180.22, + "end": 180.44, + "confidence": 0.997 + }, + { + "text": "be", + "start": 180.44, + "end": 180.58, + "confidence": 0.998 + }, + { + "text": "my", + "start": 180.58, + "end": 180.86, + "confidence": 0.992 + }, + { + "text": "enemy", + "start": 180.86, + "end": 181.44, + "confidence": 0.976 + } + ] + }, + { + "id": 40, + "seek": 17906, + "start": 183.12, + "end": 184.52, + "text": " Spare the sympathy", + "tokens": [ + 1738, + 543, + 264, + 33240 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.678, + "words": [ + { + "text": "Spare", + "start": 183.12, + "end": 183.7, + "confidence": 0.479 + }, + { + "text": "the", + "start": 183.7, + "end": 183.92, + "confidence": 0.934 + }, + { + "text": "sympathy", + "start": 183.92, + "end": 184.52, + "confidence": 0.986 + } + ] + }, + { + "id": 41, + "seek": 17906, + "start": 185.56, + "end": 188.46, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.959, + "words": [ + { + "text": "Everybody", + "start": 185.56, + "end": 186.08, + "confidence": 0.993 + }, + { + "text": "wants", + "start": 186.08, + "end": 186.46, + "confidence": 0.99 + }, + { + "text": "to", + "start": 186.46, + "end": 186.72, + "confidence": 0.999 + }, + { + "text": "be", + "start": 186.72, + "end": 187.22, + "confidence": 0.999 + }, + { + "text": "my", + "start": 187.22, + "end": 187.86, + "confidence": 0.798 + }, + { + "text": "enemy", + "start": 187.86, + "end": 188.46, + "confidence": 0.991 + } + ] + }, + { + "id": 42, + "seek": 17906, + "start": 190.1, + "end": 191.96, + "text": " I swear, I swear I'll never be a saint", + "tokens": [ + 286, + 11902, + 11, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.662, + "words": [ + { + "text": "I", + "start": 190.1, + "end": 190.14, + "confidence": 0.107 + }, + { + "text": "swear,", + "start": 190.14, + "end": 190.44, + "confidence": 0.908 + }, + { + "text": "I", + "start": 190.44, + "end": 190.48, + "confidence": 0.962 + }, + { + "text": "swear", + "start": 190.48, + "end": 190.52, + "confidence": 0.867 + }, + { + "text": "I'll", + "start": 190.52, + "end": 191.24, + "confidence": 0.628 + }, + { + "text": "never", + "start": 191.24, + "end": 191.28, + "confidence": 0.987 + }, + { + "text": "be", + "start": 191.28, + "end": 191.56, + "confidence": 0.993 + }, + { + "text": "a", + "start": 191.56, + "end": 191.76, + "confidence": 0.602 + }, + { + "text": "saint", + "start": 191.76, + "end": 191.96, + "confidence": 0.858 + } + ] + }, + { + "id": 43, + "seek": 17906, + "start": 192.12, + "end": 194.66, + "text": " I swear, my enemy", + "tokens": [ + 286, + 11902, + 11, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.529, + "words": [ + { + "text": "I", + "start": 192.12, + "end": 192.62, + "confidence": 0.35 + }, + { + "text": "swear,", + "start": 192.62, + "end": 193.88, + "confidence": 0.83 + }, + { + "text": "my", + "start": 193.88, + "end": 194.02, + "confidence": 0.279 + }, + { + "text": "enemy", + "start": 194.02, + "end": 194.66, + "confidence": 0.964 + } + ] + }, + { + "id": 44, + "seek": 17906, + "start": 196.34, + "end": 198.36, + "text": " I swear, I swear I'll never be a saint", + "tokens": [ + 286, + 11902, + 11, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.976, + "words": [ + { + "text": "I", + "start": 196.34, + "end": 196.38, + "confidence": 0.945 + }, + { + "text": "swear,", + "start": 196.38, + "end": 196.56, + "confidence": 0.999 + }, + { + "text": "I", + "start": 196.56, + "end": 196.6, + "confidence": 0.875 + }, + { + "text": "swear", + "start": 196.6, + "end": 196.84, + "confidence": 0.995 + }, + { + "text": "I'll", + "start": 196.84, + "end": 197.36, + "confidence": 0.982 + }, + { + "text": "never", + "start": 197.36, + "end": 197.5, + "confidence": 0.999 + }, + { + "text": "be", + "start": 197.5, + "end": 197.8, + "confidence": 0.999 + }, + { + "text": "a", + "start": 197.8, + "end": 198.04, + "confidence": 0.997 + }, + { + "text": "saint", + "start": 198.04, + "end": 198.36, + "confidence": 0.994 + } + ] + }, + { + "id": 45, + "seek": 19822, + "start": 198.44, + "end": 200.22, + "text": " You got to be yourself!", + "tokens": [ + 50364, + 509, + 658, + 281, + 312, + 1803, + 0, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.8683164384629991, + "compression_ratio": 0.7419354838709677, + "no_speech_prob": 0.6954998970031738, + "confidence": 0.365, + "words": [ + { + "text": "You", + "start": 198.44, + "end": 198.62, + "confidence": 0.054 + }, + { + "text": "got", + "start": 198.62, + "end": 198.8, + "confidence": 0.163 + }, + { + "text": "to", + "start": 198.8, + "end": 198.98, + "confidence": 0.918 + }, + { + "text": "be", + "start": 198.98, + "end": 199.06, + "confidence": 0.87 + }, + { + "text": "yourself!", + "start": 199.06, + "end": 200.22, + "confidence": 0.913 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/nocond_music.mp4.words.json b/tests/expected/corner_cases.cpu/nocond_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..ddd3ddeb2eb18ea98bb99dffe65159d92a046d85 --- /dev/null +++ b/tests/expected/corner_cases.cpu/nocond_music.mp4.words.json @@ -0,0 +1,2687 @@ +{ + "text": " I Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself But I'm ready Your words up on the wall as you're praying for my phone And the laughter in the holes and the names that I've been called I stack it in my mind and I'm waiting for the time When I show you what it's like to be worse but in the mind Tell you you're the greatest But once you turn they hate us Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay I'm hoping that somebody pray for me I'm praying that somebody hold for me. I'm staying where nobody's supposed to be. I propose to be in a wreck of emotions. Ready to go whenever you let me know. The road is long, so put the pedal into the flow. The enemy on my trail, my energy unavailable. I'ma tell them I said away, go away. When I'm plotting, I'ma drive to the top. I've been out of shape, thinking out of the box. I'm an astronaut, blasted off the planet. Rock the cars, catastrophic, and it matters more because I had it now. Had I thought about wreaking havoc on an opposition. Kind of shocking, they want it static. With precision, I'm automatic. Quarterback, I ain't talking second. Pack it, pack it up. I don't panic, better, better. Up who the baddest. it don't matter cause we is your enemy. I swear I'll never be insane You got to be yourself", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.02, + "end": 0.4, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.9367842674255371, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7794302701950073, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 0.02, + "end": 0.4, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.02, + "end": 69.26, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.669, + "words": [ + { + "text": "Oh,", + "start": 60.02, + "end": 65.44, + "confidence": 0.084 + }, + { + "text": "the", + "start": 65.44, + "end": 65.48, + "confidence": 0.796 + }, + { + "text": "misery", + "start": 65.48, + "end": 66.08, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 66.08, + "end": 67.62, + "confidence": 0.431 + }, + { + "text": "wants", + "start": 67.62, + "end": 68.0, + "confidence": 0.983 + }, + { + "text": "to", + "start": 68.0, + "end": 68.2, + "confidence": 0.993 + }, + { + "text": "be", + "start": 68.2, + "end": 68.4, + "confidence": 0.996 + }, + { + "text": "my", + "start": 68.4, + "end": 68.74, + "confidence": 0.984 + }, + { + "text": "enemy", + "start": 68.74, + "end": 69.26, + "confidence": 0.984 + } + ] + }, + { + "id": 2, + "seek": 6000, + "start": 70.9, + "end": 76.38, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.934, + "words": [ + { + "text": "Spare", + "start": 70.9, + "end": 71.4, + "confidence": 0.812 + }, + { + "text": "the", + "start": 71.4, + "end": 71.74, + "confidence": 0.968 + }, + { + "text": "sympathy", + "start": 71.74, + "end": 72.58, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 72.58, + "end": 73.88, + "confidence": 0.962 + }, + { + "text": "wants", + "start": 73.88, + "end": 74.24, + "confidence": 0.994 + }, + { + "text": "to", + "start": 74.24, + "end": 74.42, + "confidence": 0.999 + }, + { + "text": "be", + "start": 74.42, + "end": 75.36, + "confidence": 0.999 + }, + { + "text": "my", + "start": 75.36, + "end": 75.68, + "confidence": 0.837 + }, + { + "text": "enemy", + "start": 75.68, + "end": 76.38, + "confidence": 0.997 + } + ] + }, + { + "id": 3, + "seek": 6000, + "start": 79.94, + "end": 82.58, + "text": " Look out for yourself My enemy", + "tokens": [ + 2053, + 484, + 337, + 1803, + 1222, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.774, + "words": [ + { + "text": "Look", + "start": 79.94, + "end": 80.14, + "confidence": 0.43 + }, + { + "text": "out", + "start": 80.14, + "end": 80.46, + "confidence": 0.991 + }, + { + "text": "for", + "start": 80.46, + "end": 80.6, + "confidence": 0.989 + }, + { + "text": "yourself", + "start": 80.6, + "end": 81.24, + "confidence": 0.97 + }, + { + "text": "My", + "start": 81.24, + "end": 81.88, + "confidence": 0.536 + }, + { + "text": "enemy", + "start": 81.88, + "end": 82.58, + "confidence": 0.98 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 86.18, + "end": 88.46, + "text": " Look out for yourself But I'm ready", + "tokens": [ + 2053, + 484, + 337, + 1803, + 583, + 286, + 478, + 1919 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.927, + "words": [ + { + "text": "Look", + "start": 86.18, + "end": 86.4, + "confidence": 0.747 + }, + { + "text": "out", + "start": 86.4, + "end": 86.64, + "confidence": 0.988 + }, + { + "text": "for", + "start": 86.64, + "end": 86.82, + "confidence": 0.998 + }, + { + "text": "yourself", + "start": 86.82, + "end": 87.62, + "confidence": 0.999 + }, + { + "text": "But", + "start": 87.62, + "end": 87.88, + "confidence": 0.764 + }, + { + "text": "I'm", + "start": 87.88, + "end": 88.12, + "confidence": 0.989 + }, + { + "text": "ready", + "start": 88.12, + "end": 88.46, + "confidence": 0.995 + } + ] + }, + { + "id": 5, + "seek": 8856, + "start": 88.58, + "end": 91.58, + "text": " Your words up on the wall as you're praying for my phone", + "tokens": [ + 2260, + 2283, + 493, + 322, + 264, + 2929, + 382, + 291, + 434, + 15611, + 337, + 452, + 2593 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.82, + "words": [ + { + "text": "Your", + "start": 88.58, + "end": 88.86, + "confidence": 0.549 + }, + { + "text": "words", + "start": 88.86, + "end": 89.12, + "confidence": 0.921 + }, + { + "text": "up", + "start": 89.12, + "end": 89.46, + "confidence": 0.965 + }, + { + "text": "on", + "start": 89.46, + "end": 89.6, + "confidence": 0.995 + }, + { + "text": "the", + "start": 89.6, + "end": 89.66, + "confidence": 0.991 + }, + { + "text": "wall", + "start": 89.66, + "end": 90.12, + "confidence": 0.88 + }, + { + "text": "as", + "start": 90.12, + "end": 90.34, + "confidence": 0.403 + }, + { + "text": "you're", + "start": 90.34, + "end": 90.8, + "confidence": 0.828 + }, + { + "text": "praying", + "start": 90.8, + "end": 90.84, + "confidence": 0.967 + }, + { + "text": "for", + "start": 90.84, + "end": 91.06, + "confidence": 0.97 + }, + { + "text": "my", + "start": 91.06, + "end": 91.24, + "confidence": 0.989 + }, + { + "text": "phone", + "start": 91.24, + "end": 91.58, + "confidence": 0.702 + } + ] + }, + { + "id": 6, + "seek": 8856, + "start": 91.78, + "end": 94.68, + "text": " And the laughter in the holes and the names that I've been called", + "tokens": [ + 400, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 286, + 600, + 668, + 1219 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.885, + "words": [ + { + "text": "And", + "start": 91.78, + "end": 91.92, + "confidence": 0.844 + }, + { + "text": "the", + "start": 91.92, + "end": 92.0, + "confidence": 0.987 + }, + { + "text": "laughter", + "start": 92.0, + "end": 92.4, + "confidence": 0.994 + }, + { + "text": "in", + "start": 92.4, + "end": 92.72, + "confidence": 0.849 + }, + { + "text": "the", + "start": 92.72, + "end": 92.92, + "confidence": 0.997 + }, + { + "text": "holes", + "start": 92.92, + "end": 93.2, + "confidence": 0.609 + }, + { + "text": "and", + "start": 93.2, + "end": 93.44, + "confidence": 0.596 + }, + { + "text": "the", + "start": 93.44, + "end": 93.54, + "confidence": 0.991 + }, + { + "text": "names", + "start": 93.54, + "end": 93.82, + "confidence": 0.991 + }, + { + "text": "that", + "start": 93.82, + "end": 94.12, + "confidence": 0.957 + }, + { + "text": "I've", + "start": 94.12, + "end": 94.28, + "confidence": 0.915 + }, + { + "text": "been", + "start": 94.28, + "end": 94.42, + "confidence": 0.986 + }, + { + "text": "called", + "start": 94.42, + "end": 94.68, + "confidence": 0.912 + } + ] + }, + { + "id": 7, + "seek": 8856, + "start": 95.0, + "end": 97.82, + "text": " I stack it in my mind and I'm waiting for the time", + "tokens": [ + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 286, + 478, + 3806, + 337, + 264, + 565 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.96, + "words": [ + { + "text": "I", + "start": 95.0, + "end": 95.12, + "confidence": 0.987 + }, + { + "text": "stack", + "start": 95.12, + "end": 95.42, + "confidence": 0.825 + }, + { + "text": "it", + "start": 95.42, + "end": 95.7, + "confidence": 0.995 + }, + { + "text": "in", + "start": 95.7, + "end": 95.82, + "confidence": 0.996 + }, + { + "text": "my", + "start": 95.82, + "end": 96.02, + "confidence": 0.994 + }, + { + "text": "mind", + "start": 96.02, + "end": 96.4, + "confidence": 0.999 + }, + { + "text": "and", + "start": 96.4, + "end": 96.62, + "confidence": 0.762 + }, + { + "text": "I'm", + "start": 96.62, + "end": 97.02, + "confidence": 0.991 + }, + { + "text": "waiting", + "start": 97.02, + "end": 97.06, + "confidence": 0.988 + }, + { + "text": "for", + "start": 97.06, + "end": 97.32, + "confidence": 0.996 + }, + { + "text": "the", + "start": 97.32, + "end": 97.5, + "confidence": 0.996 + }, + { + "text": "time", + "start": 97.5, + "end": 97.82, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 8856, + "start": 97.98, + "end": 101.24, + "text": " When I show you what it's like to be worse but in the mind", + "tokens": [ + 1133, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 457, + 294, + 264, + 1575 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.755, + "words": [ + { + "text": "When", + "start": 97.98, + "end": 98.16, + "confidence": 0.923 + }, + { + "text": "I", + "start": 98.16, + "end": 98.22, + "confidence": 0.993 + }, + { + "text": "show", + "start": 98.22, + "end": 98.48, + "confidence": 0.959 + }, + { + "text": "you", + "start": 98.48, + "end": 98.74, + "confidence": 0.994 + }, + { + "text": "what", + "start": 98.74, + "end": 98.96, + "confidence": 0.996 + }, + { + "text": "it's", + "start": 98.96, + "end": 99.38, + "confidence": 0.988 + }, + { + "text": "like", + "start": 99.38, + "end": 99.42, + "confidence": 0.998 + }, + { + "text": "to", + "start": 99.42, + "end": 99.68, + "confidence": 0.877 + }, + { + "text": "be", + "start": 99.68, + "end": 99.78, + "confidence": 0.997 + }, + { + "text": "worse", + "start": 99.78, + "end": 100.04, + "confidence": 0.444 + }, + { + "text": "but", + "start": 100.04, + "end": 100.36, + "confidence": 0.196 + }, + { + "text": "in", + "start": 100.36, + "end": 100.52, + "confidence": 0.862 + }, + { + "text": "the", + "start": 100.52, + "end": 100.8, + "confidence": 0.426 + }, + { + "text": "mind", + "start": 100.8, + "end": 101.24, + "confidence": 0.621 + } + ] + }, + { + "id": 9, + "seek": 8856, + "start": 101.32, + "end": 106.28, + "text": " Tell you you're the greatest", + "tokens": [ + 5115, + 291, + 291, + 434, + 264, + 6636 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.94, + "words": [ + { + "text": "Tell", + "start": 101.32, + "end": 102.08, + "confidence": 0.937 + }, + { + "text": "you", + "start": 102.08, + "end": 102.68, + "confidence": 0.993 + }, + { + "text": "you're", + "start": 102.68, + "end": 103.66, + "confidence": 0.873 + }, + { + "text": "the", + "start": 103.66, + "end": 104.14, + "confidence": 0.985 + }, + { + "text": "greatest", + "start": 104.14, + "end": 106.28, + "confidence": 0.986 + } + ] + }, + { + "id": 10, + "seek": 8856, + "start": 107.18, + "end": 113.0, + "text": " But once you turn they hate us", + "tokens": [ + 583, + 1564, + 291, + 1261, + 436, + 4700, + 505 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.909, + "words": [ + { + "text": "But", + "start": 107.18, + "end": 107.52, + "confidence": 0.963 + }, + { + "text": "once", + "start": 107.52, + "end": 108.34, + "confidence": 0.79 + }, + { + "text": "you", + "start": 108.34, + "end": 108.9, + "confidence": 0.998 + }, + { + "text": "turn", + "start": 108.9, + "end": 109.84, + "confidence": 0.913 + }, + { + "text": "they", + "start": 109.84, + "end": 110.34, + "confidence": 0.754 + }, + { + "text": "hate", + "start": 110.34, + "end": 111.9, + "confidence": 0.985 + }, + { + "text": "us", + "start": 111.9, + "end": 113.0, + "confidence": 0.999 + } + ] + }, + { + "id": 11, + "seek": 11312, + "start": 113.14, + "end": 119.18, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.804, + "words": [ + { + "text": "Oh,", + "start": 113.14, + "end": 115.14, + "confidence": 0.317 + }, + { + "text": "the", + "start": 115.14, + "end": 115.34, + "confidence": 0.964 + }, + { + "text": "misery", + "start": 115.34, + "end": 116.08, + "confidence": 0.995 + }, + { + "text": "Everybody", + "start": 116.08, + "end": 117.48, + "confidence": 0.482 + }, + { + "text": "wants", + "start": 117.48, + "end": 117.86, + "confidence": 0.986 + }, + { + "text": "to", + "start": 117.86, + "end": 118.08, + "confidence": 0.995 + }, + { + "text": "be", + "start": 118.08, + "end": 118.26, + "confidence": 0.998 + }, + { + "text": "my", + "start": 118.26, + "end": 118.58, + "confidence": 0.993 + }, + { + "text": "enemy", + "start": 118.58, + "end": 119.18, + "confidence": 0.983 + } + ] + }, + { + "id": 12, + "seek": 11312, + "start": 119.94, + "end": 126.08, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.874, + "words": [ + { + "text": "Spare", + "start": 119.94, + "end": 121.26, + "confidence": 0.576 + }, + { + "text": "the", + "start": 121.26, + "end": 121.62, + "confidence": 0.985 + }, + { + "text": "sympathy", + "start": 121.62, + "end": 122.32, + "confidence": 0.994 + }, + { + "text": "Everybody", + "start": 122.32, + "end": 123.76, + "confidence": 0.969 + }, + { + "text": "wants", + "start": 123.76, + "end": 124.12, + "confidence": 0.994 + }, + { + "text": "to", + "start": 124.12, + "end": 124.6, + "confidence": 0.999 + }, + { + "text": "be", + "start": 124.6, + "end": 125.34, + "confidence": 0.999 + }, + { + "text": "my", + "start": 125.34, + "end": 125.58, + "confidence": 0.835 + }, + { + "text": "enemy", + "start": 125.58, + "end": 126.08, + "confidence": 0.998 + } + ] + }, + { + "id": 13, + "seek": 11312, + "start": 129.86, + "end": 132.32, + "text": " Look out for yourself My enemy", + "tokens": [ + 2053, + 484, + 337, + 1803, + 1222, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.794, + "words": [ + { + "text": "Look", + "start": 129.86, + "end": 130.08, + "confidence": 0.592 + }, + { + "text": "out", + "start": 130.08, + "end": 130.3, + "confidence": 0.892 + }, + { + "text": "for", + "start": 130.3, + "end": 130.44, + "confidence": 0.978 + }, + { + "text": "yourself", + "start": 130.44, + "end": 130.92, + "confidence": 0.988 + }, + { + "text": "My", + "start": 130.92, + "end": 131.74, + "confidence": 0.496 + }, + { + "text": "enemy", + "start": 131.74, + "end": 132.32, + "confidence": 0.989 + } + ] + }, + { + "id": 14, + "seek": 11312, + "start": 133.7, + "end": 137.14, + "text": " Look out for yourself", + "tokens": [ + 2053, + 484, + 337, + 1803 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.97, + "words": [ + { + "text": "Look", + "start": 133.7, + "end": 134.62, + "confidence": 0.907 + }, + { + "text": "out", + "start": 134.62, + "end": 136.52, + "confidence": 0.978 + }, + { + "text": "for", + "start": 136.52, + "end": 136.66, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.66, + "end": 137.14, + "confidence": 0.999 + } + ] + }, + { + "id": 15, + "seek": 11312, + "start": 137.64, + "end": 140.16, + "text": " Look, okay I'm hoping that somebody pray for me", + "tokens": [ + 2053, + 11, + 1392, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.786, + "words": [ + { + "text": "Look,", + "start": 137.64, + "end": 138.46, + "confidence": 0.727 + }, + { + "text": "okay", + "start": 138.46, + "end": 138.62, + "confidence": 0.727 + }, + { + "text": "I'm", + "start": 138.62, + "end": 138.88, + "confidence": 0.698 + }, + { + "text": "hoping", + "start": 138.88, + "end": 139.14, + "confidence": 0.501 + }, + { + "text": "that", + "start": 139.14, + "end": 139.34, + "confidence": 0.958 + }, + { + "text": "somebody", + "start": 139.34, + "end": 139.52, + "confidence": 0.981 + }, + { + "text": "pray", + "start": 139.52, + "end": 139.8, + "confidence": 0.821 + }, + { + "text": "for", + "start": 139.8, + "end": 140.04, + "confidence": 0.9 + }, + { + "text": "me", + "start": 140.04, + "end": 140.16, + "confidence": 0.999 + } + ] + }, + { + "id": 16, + "seek": 14012, + "start": 140.18, + "end": 141.7, + "text": " I'm praying that somebody hold for me.", + "tokens": [ + 286, + 478, + 15611, + 300, + 2618, + 1797, + 337, + 385, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.795, + "words": [ + { + "text": "I'm", + "start": 140.18, + "end": 140.38, + "confidence": 0.819 + }, + { + "text": "praying", + "start": 140.38, + "end": 140.62, + "confidence": 0.694 + }, + { + "text": "that", + "start": 140.62, + "end": 140.82, + "confidence": 0.786 + }, + { + "text": "somebody", + "start": 140.82, + "end": 141.08, + "confidence": 0.985 + }, + { + "text": "hold", + "start": 141.08, + "end": 141.32, + "confidence": 0.447 + }, + { + "text": "for", + "start": 141.32, + "end": 141.52, + "confidence": 0.993 + }, + { + "text": "me.", + "start": 141.52, + "end": 141.7, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 14012, + "start": 141.72, + "end": 143.43, + "text": " I'm staying where nobody's supposed to be.", + "tokens": [ + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.853, + "words": [ + { + "text": "I'm", + "start": 141.72, + "end": 141.9, + "confidence": 0.976 + }, + { + "text": "staying", + "start": 141.9, + "end": 142.12, + "confidence": 0.708 + }, + { + "text": "where", + "start": 142.12, + "end": 142.3, + "confidence": 0.906 + }, + { + "text": "nobody's", + "start": 142.3, + "end": 142.88, + "confidence": 0.654 + }, + { + "text": "supposed", + "start": 142.88, + "end": 142.92, + "confidence": 0.924 + }, + { + "text": "to", + "start": 142.92, + "end": 143.12, + "confidence": 0.992 + }, + { + "text": "be.", + "start": 143.12, + "end": 143.43, + "confidence": 0.997 + } + ] + }, + { + "id": 18, + "seek": 14012, + "start": 143.43, + "end": 145.29, + "text": " I propose to be in a wreck of emotions.", + "tokens": [ + 286, + 17421, + 281, + 312, + 294, + 257, + 21478, + 295, + 8462, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.645, + "words": [ + { + "text": "I", + "start": 143.43, + "end": 143.47, + "confidence": 0.492 + }, + { + "text": "propose", + "start": 143.47, + "end": 143.66, + "confidence": 0.332 + }, + { + "text": "to", + "start": 143.66, + "end": 143.84, + "confidence": 0.78 + }, + { + "text": "be", + "start": 143.84, + "end": 144.04, + "confidence": 0.442 + }, + { + "text": "in", + "start": 144.04, + "end": 144.16, + "confidence": 0.81 + }, + { + "text": "a", + "start": 144.16, + "end": 144.24, + "confidence": 0.555 + }, + { + "text": "wreck", + "start": 144.24, + "end": 144.36, + "confidence": 0.81 + }, + { + "text": "of", + "start": 144.36, + "end": 144.54, + "confidence": 0.99 + }, + { + "text": "emotions.", + "start": 144.54, + "end": 145.29, + "confidence": 0.953 + } + ] + }, + { + "id": 19, + "seek": 14012, + "start": 145.29, + "end": 146.56, + "text": " Ready to go whenever you let me know.", + "tokens": [ + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.834, + "words": [ + { + "text": "Ready", + "start": 145.29, + "end": 145.33, + "confidence": 0.951 + }, + { + "text": "to", + "start": 145.33, + "end": 145.5, + "confidence": 0.997 + }, + { + "text": "go", + "start": 145.5, + "end": 145.56, + "confidence": 0.997 + }, + { + "text": "whenever", + "start": 145.56, + "end": 145.76, + "confidence": 0.659 + }, + { + "text": "you", + "start": 145.76, + "end": 145.92, + "confidence": 0.403 + }, + { + "text": "let", + "start": 145.92, + "end": 146.04, + "confidence": 0.952 + }, + { + "text": "me", + "start": 146.04, + "end": 146.16, + "confidence": 0.998 + }, + { + "text": "know.", + "start": 146.16, + "end": 146.56, + "confidence": 0.977 + } + ] + }, + { + "id": 20, + "seek": 14012, + "start": 146.56, + "end": 147.86, + "text": " The road is long, so put the pedal into the flow.", + "tokens": [ + 440, + 3060, + 307, + 938, + 11, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.959, + "words": [ + { + "text": "The", + "start": 146.56, + "end": 146.6, + "confidence": 0.99 + }, + { + "text": "road", + "start": 146.6, + "end": 146.66, + "confidence": 0.99 + }, + { + "text": "is", + "start": 146.66, + "end": 146.74, + "confidence": 0.987 + }, + { + "text": "long,", + "start": 146.74, + "end": 146.96, + "confidence": 0.961 + }, + { + "text": "so", + "start": 146.96, + "end": 147.02, + "confidence": 0.968 + }, + { + "text": "put", + "start": 147.02, + "end": 147.14, + "confidence": 0.954 + }, + { + "text": "the", + "start": 147.14, + "end": 147.28, + "confidence": 0.972 + }, + { + "text": "pedal", + "start": 147.28, + "end": 147.38, + "confidence": 0.993 + }, + { + "text": "into", + "start": 147.38, + "end": 147.58, + "confidence": 0.898 + }, + { + "text": "the", + "start": 147.58, + "end": 147.74, + "confidence": 0.992 + }, + { + "text": "flow.", + "start": 147.74, + "end": 147.86, + "confidence": 0.857 + } + ] + }, + { + "id": 21, + "seek": 14012, + "start": 147.94, + "end": 150.22, + "text": " The enemy on my trail, my energy unavailable.", + "tokens": [ + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.949, + "words": [ + { + "text": "The", + "start": 147.94, + "end": 148.06, + "confidence": 0.968 + }, + { + "text": "enemy", + "start": 148.06, + "end": 148.24, + "confidence": 0.727 + }, + { + "text": "on", + "start": 148.24, + "end": 148.44, + "confidence": 0.974 + }, + { + "text": "my", + "start": 148.44, + "end": 148.5, + "confidence": 0.974 + }, + { + "text": "trail,", + "start": 148.5, + "end": 148.72, + "confidence": 0.986 + }, + { + "text": "my", + "start": 148.72, + "end": 148.82, + "confidence": 0.99 + }, + { + "text": "energy", + "start": 148.82, + "end": 149.06, + "confidence": 0.996 + }, + { + "text": "unavailable.", + "start": 149.06, + "end": 150.22, + "confidence": 0.978 + } + ] + }, + { + "id": 22, + "seek": 14012, + "start": 150.22, + "end": 151.35, + "text": " I'ma tell them I said away, go away.", + "tokens": [ + 286, + 478, + 64, + 980, + 552, + 286, + 848, + 1314, + 11, + 352, + 1314, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.571, + "words": [ + { + "text": "I'ma", + "start": 150.22, + "end": 150.26, + "confidence": 0.767 + }, + { + "text": "tell", + "start": 150.26, + "end": 150.36, + "confidence": 0.989 + }, + { + "text": "them", + "start": 150.36, + "end": 150.5, + "confidence": 0.336 + }, + { + "text": "I", + "start": 150.5, + "end": 150.58, + "confidence": 0.358 + }, + { + "text": "said", + "start": 150.58, + "end": 150.74, + "confidence": 0.3 + }, + { + "text": "away,", + "start": 150.74, + "end": 151.14, + "confidence": 0.323 + }, + { + "text": "go", + "start": 151.14, + "end": 151.18, + "confidence": 0.911 + }, + { + "text": "away.", + "start": 151.18, + "end": 151.35, + "confidence": 0.781 + } + ] + }, + { + "id": 23, + "seek": 14012, + "start": 151.35, + "end": 152.78, + "text": " When I'm plotting, I'ma drive to the top.", + "tokens": [ + 1133, + 286, + 478, + 41178, + 11, + 286, + 478, + 64, + 3332, + 281, + 264, + 1192, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.645, + "words": [ + { + "text": "When", + "start": 151.35, + "end": 151.54, + "confidence": 0.884 + }, + { + "text": "I'm", + "start": 151.54, + "end": 151.72, + "confidence": 0.594 + }, + { + "text": "plotting,", + "start": 151.72, + "end": 151.92, + "confidence": 0.367 + }, + { + "text": "I'ma", + "start": 151.92, + "end": 152.04, + "confidence": 0.488 + }, + { + "text": "drive", + "start": 152.04, + "end": 152.14, + "confidence": 0.621 + }, + { + "text": "to", + "start": 152.14, + "end": 152.32, + "confidence": 0.984 + }, + { + "text": "the", + "start": 152.32, + "end": 152.38, + "confidence": 0.997 + }, + { + "text": "top.", + "start": 152.38, + "end": 152.78, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 14012, + "start": 152.78, + "end": 153.76, + "text": " I've been out of shape, thinking out of the box.", + "tokens": [ + 286, + 600, + 668, + 484, + 295, + 3909, + 11, + 1953, + 484, + 295, + 264, + 2424, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.769, + "words": [ + { + "text": "I've", + "start": 152.78, + "end": 152.82, + "confidence": 0.721 + }, + { + "text": "been", + "start": 152.82, + "end": 152.86, + "confidence": 0.998 + }, + { + "text": "out", + "start": 152.86, + "end": 152.9, + "confidence": 0.905 + }, + { + "text": "of", + "start": 152.9, + "end": 152.96, + "confidence": 0.986 + }, + { + "text": "shape,", + "start": 152.96, + "end": 153.12, + "confidence": 0.998 + }, + { + "text": "thinking", + "start": 153.12, + "end": 153.3, + "confidence": 0.842 + }, + { + "text": "out", + "start": 153.3, + "end": 153.48, + "confidence": 0.347 + }, + { + "text": "of", + "start": 153.48, + "end": 153.54, + "confidence": 0.507 + }, + { + "text": "the", + "start": 153.54, + "end": 153.58, + "confidence": 0.819 + }, + { + "text": "box.", + "start": 153.58, + "end": 153.76, + "confidence": 0.998 + } + ] + }, + { + "id": 25, + "seek": 14012, + "start": 153.76, + "end": 155.34, + "text": " I'm an astronaut, blasted off the planet.", + "tokens": [ + 286, + 478, + 364, + 18516, + 11, + 12035, + 292, + 766, + 264, + 5054, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.967, + "words": [ + { + "text": "I'm", + "start": 153.76, + "end": 153.9, + "confidence": 0.996 + }, + { + "text": "an", + "start": 153.9, + "end": 153.98, + "confidence": 0.996 + }, + { + "text": "astronaut,", + "start": 153.98, + "end": 154.68, + "confidence": 0.991 + }, + { + "text": "blasted", + "start": 154.68, + "end": 154.86, + "confidence": 0.895 + }, + { + "text": "off", + "start": 154.86, + "end": 155.0, + "confidence": 0.987 + }, + { + "text": "the", + "start": 155.0, + "end": 155.16, + "confidence": 0.959 + }, + { + "text": "planet.", + "start": 155.16, + "end": 155.34, + "confidence": 0.997 + } + ] + }, + { + "id": 26, + "seek": 14012, + "start": 155.36, + "end": 157.11, + "text": " Rock the cars, catastrophic, and it matters more", + "tokens": [ + 6922, + 264, + 5163, + 11, + 34915, + 11, + 293, + 309, + 7001, + 544 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.703, + "words": [ + { + "text": "Rock", + "start": 155.36, + "end": 155.56, + "confidence": 0.563 + }, + { + "text": "the", + "start": 155.56, + "end": 155.7, + "confidence": 0.504 + }, + { + "text": "cars,", + "start": 155.7, + "end": 156.36, + "confidence": 0.472 + }, + { + "text": "catastrophic,", + "start": 156.36, + "end": 156.4, + "confidence": 0.77 + }, + { + "text": "and", + "start": 156.4, + "end": 156.62, + "confidence": 0.925 + }, + { + "text": "it", + "start": 156.62, + "end": 156.7, + "confidence": 0.73 + }, + { + "text": "matters", + "start": 156.7, + "end": 156.94, + "confidence": 0.874 + }, + { + "text": "more", + "start": 156.94, + "end": 157.11, + "confidence": 0.973 + } + ] + }, + { + "id": 27, + "seek": 14012, + "start": 157.11, + "end": 157.93, + "text": " because I had it now.", + "tokens": [ + 570, + 286, + 632, + 309, + 586, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.648, + "words": [ + { + "text": "because", + "start": 157.11, + "end": 157.38, + "confidence": 0.99 + }, + { + "text": "I", + "start": 157.38, + "end": 157.56, + "confidence": 0.528 + }, + { + "text": "had", + "start": 157.56, + "end": 157.68, + "confidence": 0.935 + }, + { + "text": "it", + "start": 157.68, + "end": 157.76, + "confidence": 0.984 + }, + { + "text": "now.", + "start": 157.76, + "end": 157.93, + "confidence": 0.237 + } + ] + }, + { + "id": 28, + "seek": 14012, + "start": 157.93, + "end": 160.1, + "text": " Had I thought about wreaking havoc on an opposition.", + "tokens": [ + 12298, + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.955, + "words": [ + { + "text": "Had", + "start": 157.93, + "end": 158.12, + "confidence": 0.852 + }, + { + "text": "I", + "start": 158.12, + "end": 158.2, + "confidence": 0.971 + }, + { + "text": "thought", + "start": 158.2, + "end": 158.4, + "confidence": 0.991 + }, + { + "text": "about", + "start": 158.4, + "end": 158.58, + "confidence": 0.995 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.84, + "confidence": 0.996 + }, + { + "text": "havoc", + "start": 158.84, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.28, + "confidence": 0.861 + }, + { + "text": "an", + "start": 159.28, + "end": 159.44, + "confidence": 0.913 + }, + { + "text": "opposition.", + "start": 159.44, + "end": 160.1, + "confidence": 0.991 + } + ] + }, + { + "id": 29, + "seek": 14012, + "start": 160.1, + "end": 161.04, + "text": " Kind of shocking, they want it static.", + "tokens": [ + 9242, + 295, + 18776, + 11, + 436, + 528, + 309, + 13437, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.666, + "words": [ + { + "text": "Kind", + "start": 160.1, + "end": 160.14, + "confidence": 0.663 + }, + { + "text": "of", + "start": 160.14, + "end": 160.22, + "confidence": 0.995 + }, + { + "text": "shocking,", + "start": 160.22, + "end": 160.5, + "confidence": 0.656 + }, + { + "text": "they", + "start": 160.5, + "end": 160.58, + "confidence": 0.366 + }, + { + "text": "want", + "start": 160.58, + "end": 160.74, + "confidence": 0.573 + }, + { + "text": "it", + "start": 160.74, + "end": 160.84, + "confidence": 0.648 + }, + { + "text": "static.", + "start": 160.84, + "end": 161.04, + "confidence": 0.993 + } + ] + }, + { + "id": 30, + "seek": 14012, + "start": 161.06, + "end": 162.14, + "text": " With precision, I'm automatic.", + "tokens": [ + 2022, + 18356, + 11, + 286, + 478, + 12509, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.875, + "words": [ + { + "text": "With", + "start": 161.06, + "end": 161.24, + "confidence": 0.59 + }, + { + "text": "precision,", + "start": 161.24, + "end": 161.72, + "confidence": 0.904 + }, + { + "text": "I'm", + "start": 161.72, + "end": 161.78, + "confidence": 0.987 + }, + { + "text": "automatic.", + "start": 161.78, + "end": 162.14, + "confidence": 0.987 + } + ] + }, + { + "id": 31, + "seek": 14012, + "start": 162.24, + "end": 163.36, + "text": " Quarterback, I ain't talking second.", + "tokens": [ + 43794, + 3207, + 11, + 286, + 7862, + 380, + 1417, + 1150, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.688, + "words": [ + { + "text": "Quarterback,", + "start": 162.24, + "end": 162.66, + "confidence": 0.513 + }, + { + "text": "I", + "start": 162.66, + "end": 162.78, + "confidence": 0.965 + }, + { + "text": "ain't", + "start": 162.78, + "end": 162.86, + "confidence": 0.99 + }, + { + "text": "talking", + "start": 162.86, + "end": 163.1, + "confidence": 0.892 + }, + { + "text": "second.", + "start": 163.1, + "end": 163.36, + "confidence": 0.328 + } + ] + }, + { + "id": 32, + "seek": 14012, + "start": 163.4, + "end": 164.17, + "text": " Pack it, pack it up.", + "tokens": [ + 18466, + 309, + 11, + 2844, + 309, + 493, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.973, + "words": [ + { + "text": "Pack", + "start": 163.4, + "end": 163.6, + "confidence": 0.968 + }, + { + "text": "it,", + "start": 163.6, + "end": 163.68, + "confidence": 0.937 + }, + { + "text": "pack", + "start": 163.68, + "end": 163.84, + "confidence": 0.986 + }, + { + "text": "it", + "start": 163.84, + "end": 163.98, + "confidence": 0.999 + }, + { + "text": "up.", + "start": 163.98, + "end": 164.17, + "confidence": 0.979 + } + ] + }, + { + "id": 33, + "seek": 14012, + "start": 164.17, + "end": 165.1, + "text": " I don't panic, better, better.", + "tokens": [ + 286, + 500, + 380, + 14783, + 11, + 1101, + 11, + 1101, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.76, + "words": [ + { + "text": "I", + "start": 164.17, + "end": 164.21, + "confidence": 0.984 + }, + { + "text": "don't", + "start": 164.21, + "end": 164.34, + "confidence": 0.913 + }, + { + "text": "panic,", + "start": 164.34, + "end": 164.8, + "confidence": 0.999 + }, + { + "text": "better,", + "start": 164.8, + "end": 164.84, + "confidence": 0.423 + }, + { + "text": "better.", + "start": 164.84, + "end": 165.1, + "confidence": 0.556 + } + ] + }, + { + "id": 34, + "seek": 14012, + "start": 165.16, + "end": 165.76, + "text": " Up who the baddest.", + "tokens": [ + 5858, + 567, + 264, + 1578, + 23748, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.752, + "words": [ + { + "text": "Up", + "start": 165.16, + "end": 165.24, + "confidence": 0.518 + }, + { + "text": "who", + "start": 165.24, + "end": 165.38, + "confidence": 0.751 + }, + { + "text": "the", + "start": 165.38, + "end": 165.52, + "confidence": 0.926 + }, + { + "text": "baddest.", + "start": 165.52, + "end": 165.76, + "confidence": 0.817 + } + ] + }, + { + "id": 35, + "seek": 16572, + "start": 165.76, + "end": 166.86, + "text": " it don't matter cause we is your enemy.", + "tokens": [ + 50364, + 309, + 500, + 380, + 1871, + 3082, + 321, + 307, + 428, + 5945, + 13, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.910880969120906, + "compression_ratio": 0.8297872340425532, + "no_speech_prob": 0.6945543885231018, + "confidence": 0.448, + "words": [ + { + "text": "it", + "start": 165.76, + "end": 165.86, + "confidence": 0.124 + }, + { + "text": "don't", + "start": 165.86, + "end": 166.18, + "confidence": 0.825 + }, + { + "text": "matter", + "start": 166.18, + "end": 166.22, + "confidence": 0.996 + }, + { + "text": "cause", + "start": 166.22, + "end": 166.4, + "confidence": 0.287 + }, + { + "text": "we", + "start": 166.4, + "end": 166.5, + "confidence": 0.889 + }, + { + "text": "is", + "start": 166.5, + "end": 166.68, + "confidence": 0.231 + }, + { + "text": "your", + "start": 166.68, + "end": 166.74, + "confidence": 0.562 + }, + { + "text": "enemy.", + "start": 166.74, + "end": 166.86, + "confidence": 0.258 + } + ] + }, + { + "id": 36, + "seek": 19572, + "start": 196.18, + "end": 198.2, + "text": " I swear I'll never be insane", + "tokens": [ + 286, + 11902, + 286, + 603, + 1128, + 312, + 10838 + ], + "temperature": 0.0, + "avg_logprob": -0.5786522030830383, + "compression_ratio": 1.0, + "no_speech_prob": 0.756009578704834, + "confidence": 0.75, + "words": [ + { + "text": "I", + "start": 196.18, + "end": 196.4, + "confidence": 0.623 + }, + { + "text": "swear", + "start": 196.4, + "end": 196.66, + "confidence": 0.959 + }, + { + "text": "I'll", + "start": 196.66, + "end": 197.2, + "confidence": 0.643 + }, + { + "text": "never", + "start": 197.2, + "end": 197.5, + "confidence": 0.997 + }, + { + "text": "be", + "start": 197.5, + "end": 197.84, + "confidence": 0.993 + }, + { + "text": "insane", + "start": 197.84, + "end": 198.2, + "confidence": 0.546 + } + ] + }, + { + "id": 37, + "seek": 19772, + "start": 198.38, + "end": 199.72, + "text": " You got to be yourself", + "tokens": [ + 50364, + 509, + 658, + 281, + 312, + 1803, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.7925397753715515, + "compression_ratio": 0.7333333333333333, + "no_speech_prob": 0.7818466424942017, + "confidence": 0.406, + "words": [ + { + "text": "You", + "start": 198.38, + "end": 198.54, + "confidence": 0.093 + }, + { + "text": "got", + "start": 198.54, + "end": 198.8, + "confidence": 0.199 + }, + { + "text": "to", + "start": 198.8, + "end": 198.98, + "confidence": 0.803 + }, + { + "text": "be", + "start": 198.98, + "end": 199.08, + "confidence": 0.826 + }, + { + "text": "yourself", + "start": 199.08, + "end": 199.72, + "confidence": 0.903 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/random.nocond_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/random.nocond_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..411722411d7434ca27a340d8cf4629331e376df9 --- /dev/null +++ b/tests/expected/corner_cases.cpu/random.nocond_apollo11.mp3.words.json @@ -0,0 +1,1192 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. All right. Okay, we like to say that they make it one or two on the helmet. We're going to have a B1 and you can put the other one on the mic helmet with those GVA blizzard frames. We were going to hack me on the ground with a cover. I tried it already. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd get you to check it out. I'm not sure if you've already turned that. So I guess we're going to come up with this. Let us know. Okay, no problem. Okay, no problem. No problem. No one at the end of the line.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.68, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.542, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.52 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.968 + }, + { + "text": "for", + "start": 3.1, + "end": 3.52, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.52, + "end": 3.9, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.9, + "end": 4.28, + "confidence": 0.909 + }, + { + "text": "your", + "start": 4.28, + "end": 4.32, + "confidence": 0.971 + }, + { + "text": "Soyuz-VA", + "start": 4.32, + "end": 5.38, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.38, + "end": 6.04, + "confidence": 0.478 + }, + { + "text": "GVA.", + "start": 6.04, + "end": 6.68, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 7.5, + "end": 11.0, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.456, + "words": [ + { + "text": "All", + "start": 7.5, + "end": 7.7, + "confidence": 0.212 + }, + { + "text": "right.", + "start": 7.7, + "end": 11.0, + "confidence": 0.982 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 11.5, + "end": 17.19, + "text": " Okay, we like to say that they make it one or two on the helmet.", + "tokens": [ + 1033, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 309, + 472, + 420, + 732, + 322, + 264, + 15922, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.428, + "words": [ + { + "text": "Okay,", + "start": 11.5, + "end": 12.82, + "confidence": 0.577 + }, + { + "text": "we", + "start": 12.82, + "end": 13.12, + "confidence": 0.541 + }, + { + "text": "like", + "start": 13.12, + "end": 13.44, + "confidence": 0.493 + }, + { + "text": "to", + "start": 13.44, + "end": 13.68, + "confidence": 0.307 + }, + { + "text": "say", + "start": 13.68, + "end": 14.96, + "confidence": 0.172 + }, + { + "text": "that", + "start": 14.96, + "end": 15.56, + "confidence": 0.203 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.472 + }, + { + "text": "make", + "start": 15.6, + "end": 15.76, + "confidence": 0.378 + }, + { + "text": "it", + "start": 15.76, + "end": 15.96, + "confidence": 0.259 + }, + { + "text": "one", + "start": 15.96, + "end": 16.1, + "confidence": 0.481 + }, + { + "text": "or", + "start": 16.1, + "end": 16.34, + "confidence": 0.221 + }, + { + "text": "two", + "start": 16.34, + "end": 16.46, + "confidence": 0.938 + }, + { + "text": "on", + "start": 16.46, + "end": 16.7, + "confidence": 0.638 + }, + { + "text": "the", + "start": 16.7, + "end": 16.96, + "confidence": 0.885 + }, + { + "text": "helmet.", + "start": 16.96, + "end": 17.19, + "confidence": 0.695 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 17.19, + "end": 24.74, + "text": " We're going to have a B1 and you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 492, + 434, + 516, + 281, + 362, + 257, + 363, + 16, + 293, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.515, + "words": [ + { + "text": "We're", + "start": 17.19, + "end": 17.86, + "confidence": 0.63 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.62 + }, + { + "text": "to", + "start": 18.06, + "end": 18.24, + "confidence": 0.993 + }, + { + "text": "have", + "start": 18.24, + "end": 18.3, + "confidence": 0.959 + }, + { + "text": "a", + "start": 18.3, + "end": 18.42, + "confidence": 0.32 + }, + { + "text": "B1", + "start": 18.42, + "end": 19.3, + "confidence": 0.799 + }, + { + "text": "and", + "start": 19.3, + "end": 20.18, + "confidence": 0.498 + }, + { + "text": "you", + "start": 20.18, + "end": 20.36, + "confidence": 0.87 + }, + { + "text": "can", + "start": 20.36, + "end": 20.54, + "confidence": 0.672 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.967 + }, + { + "text": "the", + "start": 20.72, + "end": 20.88, + "confidence": 0.974 + }, + { + "text": "other", + "start": 20.88, + "end": 21.06, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.973 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.973 + }, + { + "text": "the", + "start": 21.68, + "end": 21.96, + "confidence": 0.585 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.6, + "confidence": 0.48 + }, + { + "text": "helmet", + "start": 22.6, + "end": 22.94, + "confidence": 0.949 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.444 + }, + { + "text": "those", + "start": 23.2, + "end": 23.5, + "confidence": 0.351 + }, + { + "text": "GVA", + "start": 23.5, + "end": 23.86, + "confidence": 0.169 + }, + { + "text": "blizzard", + "start": 23.86, + "end": 24.32, + "confidence": 0.094 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.74, + "confidence": 0.245 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 56.0, + "end": 61.1, + "text": " We were going to hack me on the ground with a cover.", + "tokens": [ + 492, + 645, + 516, + 281, + 10339, + 385, + 322, + 264, + 2727, + 365, + 257, + 2060, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.342, + "words": [ + { + "text": "We", + "start": 56.0, + "end": 56.68, + "confidence": 0.071 + }, + { + "text": "were", + "start": 56.68, + "end": 56.98, + "confidence": 0.177 + }, + { + "text": "going", + "start": 56.98, + "end": 57.2, + "confidence": 0.148 + }, + { + "text": "to", + "start": 57.2, + "end": 57.46, + "confidence": 0.932 + }, + { + "text": "hack", + "start": 57.46, + "end": 57.5, + "confidence": 0.261 + }, + { + "text": "me", + "start": 57.5, + "end": 57.64, + "confidence": 0.544 + }, + { + "text": "on", + "start": 57.64, + "end": 59.2, + "confidence": 0.433 + }, + { + "text": "the", + "start": 59.2, + "end": 59.5, + "confidence": 0.309 + }, + { + "text": "ground", + "start": 59.5, + "end": 59.78, + "confidence": 0.356 + }, + { + "text": "with", + "start": 59.78, + "end": 60.62, + "confidence": 0.471 + }, + { + "text": "a", + "start": 60.62, + "end": 60.78, + "confidence": 0.538 + }, + { + "text": "cover.", + "start": 60.78, + "end": 61.1, + "confidence": 0.855 + } + ] + }, + { + "id": 5, + "seek": 5500, + "start": 61.12, + "end": 61.88, + "text": " I tried it already.", + "tokens": [ + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.682, + "words": [ + { + "text": "I", + "start": 61.12, + "end": 61.26, + "confidence": 0.407 + }, + { + "text": "tried", + "start": 61.26, + "end": 61.52, + "confidence": 0.889 + }, + { + "text": "it", + "start": 61.52, + "end": 61.7, + "confidence": 0.746 + }, + { + "text": "already.", + "start": 61.7, + "end": 61.88, + "confidence": 0.803 + } + ] + }, + { + "id": 6, + "seek": 5500, + "start": 62.5, + "end": 63.12, + "text": " Okay, fine.", + "tokens": [ + 1033, + 11, + 2489, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.683, + "words": [ + { + "text": "Okay,", + "start": 62.5, + "end": 63.04, + "confidence": 0.521 + }, + { + "text": "fine.", + "start": 63.04, + "end": 63.12, + "confidence": 0.895 + } + ] + }, + { + "id": 7, + "seek": 5500, + "start": 63.12, + "end": 64.12, + "text": " We weren't sure of that.", + "tokens": [ + 492, + 4999, + 380, + 988, + 295, + 300, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.895, + "words": [ + { + "text": "We", + "start": 63.12, + "end": 63.34, + "confidence": 0.959 + }, + { + "text": "weren't", + "start": 63.34, + "end": 63.62, + "confidence": 0.988 + }, + { + "text": "sure", + "start": 63.62, + "end": 63.82, + "confidence": 0.903 + }, + { + "text": "of", + "start": 63.82, + "end": 64.08, + "confidence": 0.613 + }, + { + "text": "that.", + "start": 64.08, + "end": 64.12, + "confidence": 0.989 + } + ] + }, + { + "id": 8, + "seek": 5500, + "start": 64.46, + "end": 65.2, + "text": " Just a suggestion.", + "tokens": [ + 1449, + 257, + 16541, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.755, + "words": [ + { + "text": "Just", + "start": 64.46, + "end": 64.66, + "confidence": 0.545 + }, + { + "text": "a", + "start": 64.66, + "end": 64.76, + "confidence": 0.791 + }, + { + "text": "suggestion.", + "start": 64.76, + "end": 65.2, + "confidence": 0.997 + } + ] + }, + { + "id": 9, + "seek": 5500, + "start": 65.34, + "end": 67.92, + "text": " We thought we'd get you to check it out.", + "tokens": [ + 492, + 1194, + 321, + 1116, + 483, + 291, + 281, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.692, + "words": [ + { + "text": "We", + "start": 65.34, + "end": 65.44, + "confidence": 0.873 + }, + { + "text": "thought", + "start": 65.44, + "end": 65.62, + "confidence": 0.968 + }, + { + "text": "we'd", + "start": 65.62, + "end": 66.76, + "confidence": 0.686 + }, + { + "text": "get", + "start": 66.76, + "end": 67.06, + "confidence": 0.093 + }, + { + "text": "you", + "start": 67.06, + "end": 67.14, + "confidence": 0.823 + }, + { + "text": "to", + "start": 67.14, + "end": 67.32, + "confidence": 0.987 + }, + { + "text": "check", + "start": 67.32, + "end": 67.46, + "confidence": 0.845 + }, + { + "text": "it", + "start": 67.46, + "end": 67.68, + "confidence": 0.991 + }, + { + "text": "out.", + "start": 67.68, + "end": 67.92, + "confidence": 0.997 + } + ] + }, + { + "id": 10, + "seek": 5500, + "start": 68.28, + "end": 69.34, + "text": " I'm not sure if you've already turned that.", + "tokens": [ + 286, + 478, + 406, + 988, + 498, + 291, + 600, + 1217, + 3574, + 300, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.444, + "words": [ + { + "text": "I'm", + "start": 68.28, + "end": 68.5, + "confidence": 0.314 + }, + { + "text": "not", + "start": 68.5, + "end": 68.56, + "confidence": 0.319 + }, + { + "text": "sure", + "start": 68.56, + "end": 68.74, + "confidence": 0.82 + }, + { + "text": "if", + "start": 68.74, + "end": 68.78, + "confidence": 0.334 + }, + { + "text": "you've", + "start": 68.78, + "end": 68.84, + "confidence": 0.389 + }, + { + "text": "already", + "start": 68.84, + "end": 68.94, + "confidence": 0.588 + }, + { + "text": "turned", + "start": 68.94, + "end": 69.12, + "confidence": 0.738 + }, + { + "text": "that.", + "start": 69.12, + "end": 69.34, + "confidence": 0.524 + } + ] + }, + { + "id": 11, + "seek": 5500, + "start": 70.42, + "end": 71.98, + "text": " So I guess we're going to come up with this.", + "tokens": [ + 407, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.709, + "words": [ + { + "text": "So", + "start": 70.42, + "end": 70.5, + "confidence": 0.533 + }, + { + "text": "I", + "start": 70.5, + "end": 70.54, + "confidence": 0.454 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.82, + "confidence": 0.991 + }, + { + "text": "we're", + "start": 70.82, + "end": 71.12, + "confidence": 0.654 + }, + { + "text": "going", + "start": 71.12, + "end": 71.28, + "confidence": 0.815 + }, + { + "text": "to", + "start": 71.28, + "end": 71.36, + "confidence": 0.992 + }, + { + "text": "come", + "start": 71.36, + "end": 71.48, + "confidence": 0.97 + }, + { + "text": "up", + "start": 71.48, + "end": 71.68, + "confidence": 0.958 + }, + { + "text": "with", + "start": 71.68, + "end": 71.92, + "confidence": 0.915 + }, + { + "text": "this.", + "start": 71.92, + "end": 71.98, + "confidence": 0.323 + } + ] + }, + { + "id": 12, + "seek": 5500, + "start": 73.5, + "end": 73.62, + "text": " Let us know.", + "tokens": [ + 961, + 505, + 458, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.847, + "words": [ + { + "text": "Let", + "start": 73.5, + "end": 73.54, + "confidence": 0.615 + }, + { + "text": "us", + "start": 73.54, + "end": 73.58, + "confidence": 0.99 + }, + { + "text": "know.", + "start": 73.58, + "end": 73.62, + "confidence": 0.998 + } + ] + }, + { + "id": 13, + "seek": 5500, + "start": 74.22, + "end": 75.22, + "text": " Okay, no problem.", + "tokens": [ + 1033, + 11, + 572, + 1154, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.851, + "words": [ + { + "text": "Okay,", + "start": 74.22, + "end": 74.7, + "confidence": 0.687 + }, + { + "text": "no", + "start": 74.7, + "end": 74.86, + "confidence": 0.917 + }, + { + "text": "problem.", + "start": 74.86, + "end": 75.22, + "confidence": 0.976 + } + ] + }, + { + "id": 14, + "seek": 5500, + "start": 75.28, + "end": 76.46, + "text": " Okay, no problem.", + "tokens": [ + 1033, + 11, + 572, + 1154, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.623, + "words": [ + { + "text": "Okay,", + "start": 75.28, + "end": 75.98, + "confidence": 0.258 + }, + { + "text": "no", + "start": 75.98, + "end": 76.2, + "confidence": 0.948 + }, + { + "text": "problem.", + "start": 76.2, + "end": 76.46, + "confidence": 0.993 + } + ] + }, + { + "id": 15, + "seek": 7600, + "start": 76.46, + "end": 76.62, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.7044810771942138, + "compression_ratio": 0.9344262295081968, + "no_speech_prob": 0.35927486419677734, + "confidence": 0.653, + "words": [ + { + "text": "No", + "start": 76.46, + "end": 76.5, + "confidence": 0.463 + }, + { + "text": "problem.", + "start": 76.5, + "end": 76.62, + "confidence": 0.922 + } + ] + }, + { + "id": 16, + "seek": 7700, + "start": 77.02, + "end": 78.32, + "text": " No one at the end of the line.", + "tokens": [ + 50364, + 883, + 472, + 412, + 264, + 917, + 295, + 264, + 1622, + 13, + 50464 + ], + "temperature": 0.1, + "avg_logprob": -0.676593542098999, + "compression_ratio": 0.8823529411764706, + "no_speech_prob": 0.2589211165904999, + "confidence": 0.558, + "words": [ + { + "text": "No", + "start": 77.02, + "end": 77.22, + "confidence": 0.139 + }, + { + "text": "one", + "start": 77.22, + "end": 77.36, + "confidence": 0.335 + }, + { + "text": "at", + "start": 77.36, + "end": 77.44, + "confidence": 0.483 + }, + { + "text": "the", + "start": 77.44, + "end": 77.48, + "confidence": 0.996 + }, + { + "text": "end", + "start": 77.48, + "end": 77.92, + "confidence": 0.995 + }, + { + "text": "of", + "start": 77.92, + "end": 78.1, + "confidence": 0.968 + }, + { + "text": "the", + "start": 78.1, + "end": 78.2, + "confidence": 0.536 + }, + { + "text": "line.", + "start": 78.2, + "end": 78.32, + "confidence": 0.816 + } + ] + } + ], + "language": "English" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/random_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/random_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..23ccd701318a10a1d5462a5b3b9a55f8ce872217 --- /dev/null +++ b/tests/expected/corner_cases.cpu/random_apollo11.mp3.words.json @@ -0,0 +1,3815 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.94, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.2, + "avg_logprob": -0.6937426777629109, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.549, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.52 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.968 + }, + { + "text": "for", + "start": 3.1, + "end": 3.52, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.52, + "end": 3.9, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.9, + "end": 4.28, + "confidence": 0.909 + }, + { + "text": "your", + "start": 4.28, + "end": 4.32, + "confidence": 0.971 + }, + { + "text": "Soyuz-EA", + "start": 4.32, + "end": 5.42, + "confidence": 0.321 + }, + { + "text": "GLEME", + "start": 5.42, + "end": 6.04, + "confidence": 0.561 + }, + { + "text": "GVA.", + "start": 6.04, + "end": 6.94, + "confidence": 0.336 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 19.86, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.2, + "avg_logprob": -0.6937426777629109, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.469, + "words": [ + { + "text": "Alright,", + "start": 10.82, + "end": 12.5, + "confidence": 0.295 + }, + { + "text": "okay,", + "start": 12.5, + "end": 12.94, + "confidence": 0.513 + }, + { + "text": "we", + "start": 12.94, + "end": 13.12, + "confidence": 0.614 + }, + { + "text": "like", + "start": 13.12, + "end": 13.44, + "confidence": 0.496 + }, + { + "text": "to", + "start": 13.44, + "end": 13.68, + "confidence": 0.26 + }, + { + "text": "say", + "start": 13.68, + "end": 14.98, + "confidence": 0.135 + }, + { + "text": "that", + "start": 14.98, + "end": 15.56, + "confidence": 0.203 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.425 + }, + { + "text": "make", + "start": 15.6, + "end": 15.76, + "confidence": 0.408 + }, + { + "text": "the", + "start": 15.76, + "end": 15.92, + "confidence": 0.255 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.609 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.34, + "confidence": 0.447 + }, + { + "text": "on", + "start": 16.34, + "end": 16.62, + "confidence": 0.598 + }, + { + "text": "the", + "start": 16.62, + "end": 16.96, + "confidence": 0.837 + }, + { + "text": "helmet", + "start": 16.96, + "end": 17.36, + "confidence": 0.836 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.86, + "confidence": 0.301 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.606 + }, + { + "text": "to", + "start": 18.06, + "end": 18.22, + "confidence": 0.818 + }, + { + "text": "have", + "start": 18.22, + "end": 18.26, + "confidence": 0.846 + }, + { + "text": "in", + "start": 18.26, + "end": 18.42, + "confidence": 0.718 + }, + { + "text": "B1.", + "start": 18.42, + "end": 19.86, + "confidence": 0.798 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 20.1, + "end": 24.76, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.2, + "avg_logprob": -0.6937426777629109, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.52, + "words": [ + { + "text": "And", + "start": 20.1, + "end": 20.26, + "confidence": 0.62 + }, + { + "text": "you", + "start": 20.26, + "end": 20.38, + "confidence": 0.948 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.731 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.98 + }, + { + "text": "the", + "start": 20.72, + "end": 20.88, + "confidence": 0.989 + }, + { + "text": "other", + "start": 20.88, + "end": 21.06, + "confidence": 0.991 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.977 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.68, + "end": 21.96, + "confidence": 0.554 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.58, + "confidence": 0.442 + }, + { + "text": "helmet", + "start": 22.58, + "end": 22.94, + "confidence": 0.842 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.433 + }, + { + "text": "those", + "start": 23.2, + "end": 23.5, + "confidence": 0.471 + }, + { + "text": "GVA", + "start": 23.5, + "end": 23.88, + "confidence": 0.414 + }, + { + "text": "blizzard", + "start": 23.88, + "end": 24.32, + "confidence": 0.104 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.76, + "confidence": 0.25 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 25.02, + "end": 55.0, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.2, + "avg_logprob": -0.10773486667209202, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.0012825782177969813, + "confidence": 0.936, + "words": [ + { + "text": "Alright,", + "start": 25.02, + "end": 31.98, + "confidence": 0.245 + }, + { + "text": "got", + "start": 31.98, + "end": 32.02, + "confidence": 0.356 + }, + { + "text": "them,", + "start": 32.02, + "end": 32.58, + "confidence": 0.323 + }, + { + "text": "got", + "start": 32.58, + "end": 33.08, + "confidence": 0.519 + }, + { + "text": "them,", + "start": 33.08, + "end": 33.78, + "confidence": 0.941 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.507 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.12, + "confidence": 0.926 + }, + { + "text": "got", + "start": 34.12, + "end": 34.16, + "confidence": 0.475 + }, + { + "text": "them,", + "start": 34.16, + "end": 34.92, + "confidence": 0.897 + }, + { + "text": "got", + "start": 34.92, + "end": 34.96, + "confidence": 0.597 + }, + { + "text": "them,", + "start": 34.96, + "end": 35.88, + "confidence": 0.914 + }, + { + "text": "got", + "start": 35.88, + "end": 35.92, + "confidence": 0.662 + }, + { + "text": "them,", + "start": 35.92, + "end": 35.96, + "confidence": 0.942 + }, + { + "text": "got", + "start": 35.96, + "end": 36.0, + "confidence": 0.738 + }, + { + "text": "them,", + "start": 36.0, + "end": 36.04, + "confidence": 0.963 + }, + { + "text": "got", + "start": 36.04, + "end": 36.08, + "confidence": 0.819 + }, + { + "text": "them,", + "start": 36.08, + "end": 36.12, + "confidence": 0.974 + }, + { + "text": "got", + "start": 36.12, + "end": 36.16, + "confidence": 0.843 + }, + { + "text": "them,", + "start": 36.16, + "end": 36.2, + "confidence": 0.981 + }, + { + "text": "got", + "start": 36.2, + "end": 36.24, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 36.24, + "end": 36.28, + "confidence": 0.985 + }, + { + "text": "got", + "start": 36.28, + "end": 36.32, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 36.32, + "end": 36.36, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.36, + "end": 36.4, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 36.4, + "end": 36.44, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.44, + "end": 36.48, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 36.48, + "end": 36.52, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.52, + "end": 36.56, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 36.56, + "end": 36.6, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.6, + "end": 36.64, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 36.64, + "end": 36.68, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.68, + "end": 36.72, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.72, + "end": 36.76, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.76, + "end": 36.8, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.8, + "end": 36.84, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.84, + "end": 36.88, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 36.88, + "end": 36.92, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.92, + "end": 36.96, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.96, + "end": 37.0, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.0, + "end": 37.04, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.04, + "end": 37.08, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.08, + "end": 37.12, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 37.12, + "end": 37.16, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.16, + "end": 37.2, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.2, + "end": 37.24, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.24, + "end": 37.28, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 37.28, + "end": 37.32, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.32, + "end": 37.36, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.36, + "end": 37.4, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.4, + "end": 37.44, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 37.44, + "end": 37.48, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.48, + "end": 37.52, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.52, + "end": 37.56, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.56, + "end": 37.6, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.6, + "end": 37.64, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.64, + "end": 37.68, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 37.68, + "end": 37.72, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.72, + "end": 37.76, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.76, + "end": 37.8, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.8, + "end": 37.84, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.84, + "end": 37.88, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.88, + "end": 37.92, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.92, + "end": 37.96, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.96, + "end": 38.0, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.0, + "end": 38.04, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.04, + "end": 38.08, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.08, + "end": 38.12, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.12, + "end": 38.16, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.16, + "end": 38.2, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.2, + "end": 38.24, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 38.24, + "end": 38.28, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.28, + "end": 38.32, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 38.32, + "end": 38.36, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.36, + "end": 38.4, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 38.4, + "end": 38.44, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.44, + "end": 38.48, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 38.48, + "end": 38.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.52, + "end": 38.56, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 38.56, + "end": 38.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.6, + "end": 38.64, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 38.64, + "end": 38.68, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.68, + "end": 38.72, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 38.72, + "end": 38.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.76, + "end": 38.8, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 38.8, + "end": 38.84, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.84, + "end": 38.88, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 38.88, + "end": 38.92, + "confidence": 0.995 + }, + { + "text": "got", + "start": 38.92, + "end": 38.96, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 38.96, + "end": 39.0, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.0, + "end": 39.04, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.04, + "end": 39.08, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.08, + "end": 39.12, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.12, + "end": 39.16, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.16, + "end": 39.2, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.2, + "end": 39.24, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.24, + "end": 39.28, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.28, + "end": 39.32, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.32, + "end": 39.36, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.36, + "end": 39.4, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.4, + "end": 39.44, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.44, + "end": 39.48, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.48, + "end": 39.52, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.52, + "end": 39.56, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.56, + "end": 39.6, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.6, + "end": 39.64, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.64, + "end": 39.68, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.68, + "end": 39.72, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.72, + "end": 39.76, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.76, + "end": 39.8, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.8, + "end": 39.84, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.84, + "end": 39.88, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.88, + "end": 39.92, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.92, + "end": 39.96, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.96, + "end": 40.0, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.0, + "end": 40.04, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.04, + "end": 40.08, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.08, + "end": 40.12, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.12, + "end": 40.16, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.16, + "end": 40.2, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.2, + "end": 40.24, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.24, + "end": 40.28, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.28, + "end": 40.32, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.32, + "end": 40.36, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.36, + "end": 40.4, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.4, + "end": 40.44, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.44, + "end": 40.48, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.48, + "end": 40.52, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.52, + "end": 40.56, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.56, + "end": 40.6, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.6, + "end": 40.64, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.64, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.68, + "end": 40.72, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.72, + "end": 40.76, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.76, + "end": 40.8, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.8, + "end": 40.84, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.84, + "end": 40.88, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.88, + "end": 40.92, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.92, + "end": 40.96, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.96, + "end": 41.0, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.0, + "end": 41.04, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.04, + "end": 41.08, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.08, + "end": 41.12, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.12, + "end": 41.16, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.16, + "end": 41.2, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.2, + "end": 42.94, + "confidence": 0.996 + }, + { + "text": "got", + "start": 42.94, + "end": 46.82, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.82, + "end": 47.8, + "confidence": 0.996 + }, + { + "text": "got", + "start": 47.8, + "end": 48.58, + "confidence": 0.995 + }, + { + "text": "them", + "start": 48.58, + "end": 55.0, + "confidence": 0.997 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 72.06, + "end": 85.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.2, + "avg_logprob": -0.053046889369263245, + "compression_ratio": 29.52, + "no_speech_prob": 0.24410122632980347, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 72.06, + "end": 72.1, + "confidence": 0.214 + }, + { + "text": "them,", + "start": 72.1, + "end": 72.14, + "confidence": 0.95 + }, + { + "text": "got", + "start": 72.14, + "end": 72.18, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 72.18, + "end": 72.22, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.22, + "end": 72.26, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 72.26, + "end": 72.3, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.3, + "end": 72.34, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 72.34, + "end": 72.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.38, + "end": 72.42, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 72.42, + "end": 72.46, + "confidence": 0.993 + }, + { + "text": "got", + "start": 72.46, + "end": 72.5, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 72.5, + "end": 72.54, + "confidence": 0.988 + }, + { + "text": "got", + "start": 72.54, + "end": 72.58, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 72.58, + "end": 72.62, + "confidence": 0.982 + }, + { + "text": "got", + "start": 72.62, + "end": 72.66, + "confidence": 0.863 + }, + { + "text": "them,", + "start": 72.66, + "end": 72.7, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.7, + "end": 72.74, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 72.74, + "end": 72.78, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.78, + "end": 72.82, + "confidence": 0.852 + }, + { + "text": "them,", + "start": 72.82, + "end": 72.86, + "confidence": 0.925 + }, + { + "text": "got", + "start": 72.86, + "end": 72.9, + "confidence": 0.56 + }, + { + "text": "them,", + "start": 72.9, + "end": 72.94, + "confidence": 0.958 + }, + { + "text": "got", + "start": 72.94, + "end": 72.98, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 72.98, + "end": 73.02, + "confidence": 0.976 + }, + { + "text": "got", + "start": 73.02, + "end": 73.06, + "confidence": 0.843 + }, + { + "text": "them,", + "start": 73.06, + "end": 73.1, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.1, + "end": 73.14, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 73.14, + "end": 73.18, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.18, + "end": 73.22, + "confidence": 0.836 + }, + { + "text": "them,", + "start": 73.22, + "end": 73.26, + "confidence": 0.979 + }, + { + "text": "got", + "start": 73.26, + "end": 73.3, + "confidence": 0.853 + }, + { + "text": "them,", + "start": 73.3, + "end": 73.34, + "confidence": 0.983 + }, + { + "text": "got", + "start": 73.34, + "end": 73.38, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 73.38, + "end": 73.42, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.42, + "end": 73.46, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 73.46, + "end": 73.5, + "confidence": 0.985 + }, + { + "text": "got", + "start": 73.5, + "end": 73.54, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 73.54, + "end": 73.58, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.58, + "end": 73.62, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 73.62, + "end": 73.66, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.66, + "end": 73.7, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 73.7, + "end": 73.74, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.74, + "end": 73.78, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 73.78, + "end": 73.82, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.82, + "end": 73.86, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 73.86, + "end": 73.9, + "confidence": 0.988 + }, + { + "text": "got", + "start": 73.9, + "end": 73.94, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 73.94, + "end": 73.98, + "confidence": 0.989 + }, + { + "text": "got", + "start": 73.98, + "end": 74.02, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 74.02, + "end": 74.06, + "confidence": 0.989 + }, + { + "text": "got", + "start": 74.06, + "end": 74.1, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 74.1, + "end": 74.14, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.14, + "end": 74.18, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 74.18, + "end": 74.22, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.22, + "end": 74.26, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 74.26, + "end": 74.3, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.3, + "end": 74.34, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 74.34, + "end": 74.38, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.38, + "end": 74.42, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 74.42, + "end": 74.46, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.46, + "end": 74.5, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 74.5, + "end": 74.54, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.54, + "end": 74.58, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 74.58, + "end": 74.62, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.62, + "end": 74.66, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.66, + "end": 74.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.7, + "end": 74.74, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 74.74, + "end": 74.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.78, + "end": 74.82, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 74.82, + "end": 74.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.86, + "end": 74.9, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 74.9, + "end": 74.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.94, + "end": 74.98, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 74.98, + "end": 75.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.02, + "end": 75.06, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 75.06, + "end": 75.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.1, + "end": 75.14, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 75.14, + "end": 75.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.18, + "end": 75.22, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 75.22, + "end": 75.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.26, + "end": 75.3, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 75.3, + "end": 75.34, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.34, + "end": 75.38, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 75.38, + "end": 75.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.42, + "end": 75.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.46, + "end": 75.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.5, + "end": 75.54, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.54, + "end": 75.58, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.58, + "end": 75.62, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 75.62, + "end": 75.66, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.66, + "end": 75.7, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 75.7, + "end": 75.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.74, + "end": 75.78, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.78, + "end": 75.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.82, + "end": 75.86, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.86, + "end": 75.9, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.9, + "end": 75.94, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 75.94, + "end": 75.98, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.98, + "end": 76.02, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.02, + "end": 76.06, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.06, + "end": 76.1, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.1, + "end": 76.14, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.14, + "end": 76.18, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 76.18, + "end": 76.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.22, + "end": 76.26, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.26, + "end": 76.3, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.3, + "end": 76.34, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.34, + "end": 76.38, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.38, + "end": 76.42, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.42, + "end": 76.46, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.46, + "end": 76.5, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.5, + "end": 76.54, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.54, + "end": 76.58, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.58, + "end": 76.62, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.62, + "end": 76.66, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.66, + "end": 76.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.7, + "end": 76.74, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.74, + "end": 76.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.78, + "end": 76.82, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.82, + "end": 76.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.86, + "end": 76.9, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.9, + "end": 76.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.94, + "end": 76.98, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.98, + "end": 77.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.02, + "end": 77.06, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.06, + "end": 77.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.1, + "end": 77.14, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.14, + "end": 77.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.18, + "end": 77.22, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.22, + "end": 77.26, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.26, + "end": 77.3, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.3, + "end": 77.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.34, + "end": 77.38, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.38, + "end": 77.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.42, + "end": 77.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.46, + "end": 77.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.5, + "end": 77.54, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.54, + "end": 77.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.58, + "end": 77.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.62, + "end": 77.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.66, + "end": 77.7, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.7, + "end": 77.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.74, + "end": 77.78, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.78, + "end": 77.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.82, + "end": 77.86, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.86, + "end": 79.56, + "confidence": 0.994 + }, + { + "text": "got", + "start": 79.56, + "end": 79.6, + "confidence": 0.981 + }, + { + "text": "them", + "start": 79.6, + "end": 85.0, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.02, + "end": 115.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.2, + "avg_logprob": -0.04965524716227578, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971923112869263, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 85.02, + "end": 85.46, + "confidence": 0.438 + }, + { + "text": "them,", + "start": 85.46, + "end": 86.26, + "confidence": 0.943 + }, + { + "text": "got", + "start": 86.26, + "end": 86.78, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 86.78, + "end": 87.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 87.18, + "end": 87.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 87.8, + "end": 87.84, + "confidence": 0.993 + }, + { + "text": "got", + "start": 87.84, + "end": 88.6, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 88.6, + "end": 88.64, + "confidence": 0.991 + }, + { + "text": "got", + "start": 88.64, + "end": 88.9, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 88.9, + "end": 88.94, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.94, + "end": 88.98, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 88.98, + "end": 89.02, + "confidence": 0.987 + }, + { + "text": "got", + "start": 89.02, + "end": 89.06, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 89.06, + "end": 89.1, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.1, + "end": 89.14, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 89.14, + "end": 89.18, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.18, + "end": 89.22, + "confidence": 0.907 + }, + { + "text": "them,", + "start": 89.22, + "end": 89.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 89.26, + "end": 89.3, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 89.3, + "end": 89.34, + "confidence": 0.971 + }, + { + "text": "got", + "start": 89.34, + "end": 89.38, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 89.38, + "end": 89.42, + "confidence": 0.959 + }, + { + "text": "got", + "start": 89.42, + "end": 89.46, + "confidence": 0.845 + }, + { + "text": "them,", + "start": 89.46, + "end": 89.5, + "confidence": 0.96 + }, + { + "text": "got", + "start": 89.5, + "end": 89.54, + "confidence": 0.861 + }, + { + "text": "them,", + "start": 89.54, + "end": 89.58, + "confidence": 0.963 + }, + { + "text": "got", + "start": 89.58, + "end": 89.62, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 89.62, + "end": 89.66, + "confidence": 0.965 + }, + { + "text": "got", + "start": 89.66, + "end": 89.7, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 89.7, + "end": 89.74, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.74, + "end": 89.78, + "confidence": 0.887 + }, + { + "text": "them,", + "start": 89.78, + "end": 89.82, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.82, + "end": 89.86, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 89.86, + "end": 89.9, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.9, + "end": 89.94, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 89.94, + "end": 89.98, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.98, + "end": 90.02, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 90.02, + "end": 90.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.06, + "end": 90.1, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.1, + "end": 90.14, + "confidence": 0.964 + }, + { + "text": "got", + "start": 90.14, + "end": 90.18, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.18, + "end": 90.22, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.22, + "end": 90.26, + "confidence": 0.86 + }, + { + "text": "them,", + "start": 90.26, + "end": 90.3, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.3, + "end": 90.34, + "confidence": 0.867 + }, + { + "text": "them,", + "start": 90.34, + "end": 90.38, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.38, + "end": 90.42, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.42, + "end": 90.46, + "confidence": 0.967 + }, + { + "text": "got", + "start": 90.46, + "end": 90.5, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.5, + "end": 90.54, + "confidence": 0.968 + }, + { + "text": "got", + "start": 90.54, + "end": 90.58, + "confidence": 0.87 + }, + { + "text": "them,", + "start": 90.58, + "end": 90.62, + "confidence": 0.969 + }, + { + "text": "got", + "start": 90.62, + "end": 90.66, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.66, + "end": 90.7, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.7, + "end": 90.74, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.74, + "end": 90.78, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.78, + "end": 90.82, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 90.82, + "end": 90.86, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.86, + "end": 90.9, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 90.9, + "end": 90.94, + "confidence": 0.973 + }, + { + "text": "got", + "start": 90.94, + "end": 90.98, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 90.98, + "end": 91.02, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.02, + "end": 91.06, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 91.06, + "end": 91.1, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.1, + "end": 91.14, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 91.14, + "end": 91.18, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.18, + "end": 91.22, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 91.22, + "end": 91.26, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.26, + "end": 91.3, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 91.3, + "end": 91.34, + "confidence": 0.976 + }, + { + "text": "got", + "start": 91.34, + "end": 91.38, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 91.38, + "end": 91.42, + "confidence": 0.977 + }, + { + "text": "got", + "start": 91.42, + "end": 91.46, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 91.46, + "end": 91.5, + "confidence": 0.978 + }, + { + "text": "got", + "start": 91.5, + "end": 91.54, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 91.54, + "end": 91.58, + "confidence": 0.979 + }, + { + "text": "got", + "start": 91.58, + "end": 91.62, + "confidence": 0.92 + }, + { + "text": "them,", + "start": 91.62, + "end": 91.66, + "confidence": 0.98 + }, + { + "text": "got", + "start": 91.66, + "end": 91.7, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 91.7, + "end": 91.74, + "confidence": 0.981 + }, + { + "text": "got", + "start": 91.74, + "end": 91.78, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 91.78, + "end": 91.82, + "confidence": 0.982 + }, + { + "text": "got", + "start": 91.82, + "end": 91.86, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 91.86, + "end": 91.9, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.9, + "end": 91.94, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 91.94, + "end": 91.98, + "confidence": 0.984 + }, + { + "text": "got", + "start": 91.98, + "end": 92.02, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 92.02, + "end": 92.06, + "confidence": 0.985 + }, + { + "text": "got", + "start": 92.06, + "end": 92.1, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 92.1, + "end": 92.14, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.14, + "end": 92.18, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 92.18, + "end": 92.22, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.22, + "end": 92.26, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 92.26, + "end": 92.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.3, + "end": 92.34, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 92.34, + "end": 92.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.38, + "end": 92.42, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 92.42, + "end": 92.46, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.46, + "end": 92.5, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.5, + "end": 92.54, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.54, + "end": 92.58, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.58, + "end": 92.62, + "confidence": 0.99 + }, + { + "text": "got", + "start": 92.62, + "end": 92.66, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 92.66, + "end": 92.7, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.7, + "end": 92.74, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 92.74, + "end": 92.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.78, + "end": 92.82, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 92.82, + "end": 92.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.86, + "end": 92.9, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 92.9, + "end": 92.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.94, + "end": 92.98, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 92.98, + "end": 93.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.02, + "end": 93.06, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 93.06, + "end": 93.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.1, + "end": 93.14, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 93.14, + "end": 93.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.18, + "end": 93.22, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.22, + "end": 93.26, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.26, + "end": 93.3, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.3, + "end": 93.34, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.34, + "end": 93.38, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 93.38, + "end": 93.42, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.42, + "end": 93.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 93.46, + "end": 93.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.5, + "end": 93.54, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 93.54, + "end": 93.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.58, + "end": 93.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.62, + "end": 93.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.66, + "end": 93.7, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 93.7, + "end": 93.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.74, + "end": 93.78, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.78, + "end": 93.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.82, + "end": 93.86, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.86, + "end": 93.9, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.9, + "end": 93.94, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.94, + "end": 93.98, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.98, + "end": 94.02, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.02, + "end": 94.06, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.06, + "end": 94.1, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.1, + "end": 94.14, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.14, + "end": 94.18, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 94.18, + "end": 94.22, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.22, + "end": 94.26, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.26, + "end": 94.3, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.3, + "end": 94.34, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.34, + "end": 94.38, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.38, + "end": 94.42, + "confidence": 0.987 + }, + { + "text": "them", + "start": 94.42, + "end": 115.0, + "confidence": 0.996 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/stucked_lm_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/stucked_lm_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..527def1f7142f05644333ecb9e358b7e067d0468 --- /dev/null +++ b/tests/expected/corner_cases.cpu/stucked_lm_apollo11.mp3.words.json @@ -0,0 +1,3814 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.94, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.542, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.52 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.968 + }, + { + "text": "for", + "start": 3.1, + "end": 3.52, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.52, + "end": 3.9, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.9, + "end": 4.28, + "confidence": 0.909 + }, + { + "text": "your", + "start": 4.28, + "end": 4.32, + "confidence": 0.971 + }, + { + "text": "Soyuz-VA", + "start": 4.32, + "end": 5.38, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.38, + "end": 6.04, + "confidence": 0.478 + }, + { + "text": "GVA.", + "start": 6.04, + "end": 6.94, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 19.24, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.468, + "words": [ + { + "text": "Alright,", + "start": 10.82, + "end": 12.5, + "confidence": 0.309 + }, + { + "text": "okay,", + "start": 12.5, + "end": 12.94, + "confidence": 0.507 + }, + { + "text": "we", + "start": 12.94, + "end": 13.12, + "confidence": 0.61 + }, + { + "text": "like", + "start": 13.12, + "end": 13.48, + "confidence": 0.503 + }, + { + "text": "to", + "start": 13.48, + "end": 13.68, + "confidence": 0.264 + }, + { + "text": "say", + "start": 13.68, + "end": 14.98, + "confidence": 0.138 + }, + { + "text": "that", + "start": 14.98, + "end": 15.56, + "confidence": 0.2 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.418 + }, + { + "text": "make", + "start": 15.6, + "end": 15.76, + "confidence": 0.398 + }, + { + "text": "the", + "start": 15.76, + "end": 15.92, + "confidence": 0.253 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.608 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.34, + "confidence": 0.443 + }, + { + "text": "on", + "start": 16.34, + "end": 16.62, + "confidence": 0.595 + }, + { + "text": "the", + "start": 16.62, + "end": 16.96, + "confidence": 0.871 + }, + { + "text": "helmet", + "start": 16.96, + "end": 17.36, + "confidence": 0.853 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.86, + "confidence": 0.299 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.6 + }, + { + "text": "to", + "start": 18.06, + "end": 18.22, + "confidence": 0.818 + }, + { + "text": "have", + "start": 18.22, + "end": 18.26, + "confidence": 0.84 + }, + { + "text": "in", + "start": 18.26, + "end": 18.42, + "confidence": 0.717 + }, + { + "text": "B1.", + "start": 18.42, + "end": 19.24, + "confidence": 0.764 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 20.1, + "end": 24.76, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.485, + "words": [ + { + "text": "And", + "start": 20.1, + "end": 20.24, + "confidence": 0.67 + }, + { + "text": "you", + "start": 20.24, + "end": 20.38, + "confidence": 0.947 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.729 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.98 + }, + { + "text": "the", + "start": 20.72, + "end": 20.88, + "confidence": 0.989 + }, + { + "text": "other", + "start": 20.88, + "end": 21.08, + "confidence": 0.991 + }, + { + "text": "one", + "start": 21.08, + "end": 21.26, + "confidence": 0.978 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.68, + "end": 21.96, + "confidence": 0.52 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.58, + "confidence": 0.413 + }, + { + "text": "helmet", + "start": 22.58, + "end": 22.94, + "confidence": 0.882 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.425 + }, + { + "text": "those", + "start": 23.2, + "end": 23.5, + "confidence": 0.465 + }, + { + "text": "GVA", + "start": 23.5, + "end": 23.88, + "confidence": 0.216 + }, + { + "text": "blizzard", + "start": 23.88, + "end": 24.32, + "confidence": 0.107 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.76, + "confidence": 0.254 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 25.02, + "end": 55.0, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.1079042222764757, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.00111382023897022, + "confidence": 0.936, + "words": [ + { + "text": "Alright,", + "start": 25.02, + "end": 31.98, + "confidence": 0.213 + }, + { + "text": "got", + "start": 31.98, + "end": 32.02, + "confidence": 0.333 + }, + { + "text": "them,", + "start": 32.02, + "end": 32.58, + "confidence": 0.334 + }, + { + "text": "got", + "start": 32.58, + "end": 33.08, + "confidence": 0.526 + }, + { + "text": "them,", + "start": 33.08, + "end": 33.78, + "confidence": 0.942 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.539 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.18, + "confidence": 0.924 + }, + { + "text": "got", + "start": 34.18, + "end": 34.22, + "confidence": 0.48 + }, + { + "text": "them,", + "start": 34.22, + "end": 35.14, + "confidence": 0.907 + }, + { + "text": "got", + "start": 35.14, + "end": 35.18, + "confidence": 0.604 + }, + { + "text": "them,", + "start": 35.18, + "end": 35.22, + "confidence": 0.922 + }, + { + "text": "got", + "start": 35.22, + "end": 35.5, + "confidence": 0.661 + }, + { + "text": "them,", + "start": 35.5, + "end": 35.9, + "confidence": 0.948 + }, + { + "text": "got", + "start": 35.9, + "end": 35.94, + "confidence": 0.743 + }, + { + "text": "them,", + "start": 35.94, + "end": 35.98, + "confidence": 0.968 + }, + { + "text": "got", + "start": 35.98, + "end": 36.02, + "confidence": 0.825 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.06, + "confidence": 0.975 + }, + { + "text": "got", + "start": 36.06, + "end": 36.1, + "confidence": 0.839 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.14, + "confidence": 0.981 + }, + { + "text": "got", + "start": 36.14, + "end": 36.18, + "confidence": 0.891 + }, + { + "text": "them,", + "start": 36.18, + "end": 36.22, + "confidence": 0.985 + }, + { + "text": "got", + "start": 36.22, + "end": 36.26, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 36.26, + "end": 36.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.3, + "end": 36.34, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 36.34, + "end": 36.38, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.38, + "end": 36.42, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 36.42, + "end": 36.46, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.46, + "end": 36.5, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.5, + "end": 36.54, + "confidence": 0.989 + }, + { + "text": "got", + "start": 36.54, + "end": 36.58, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 36.58, + "end": 36.62, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.62, + "end": 36.66, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 36.66, + "end": 36.7, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.7, + "end": 36.74, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.74, + "end": 36.78, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.78, + "end": 36.82, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 36.82, + "end": 36.86, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.86, + "end": 36.9, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 36.9, + "end": 36.94, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.94, + "end": 36.98, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 36.98, + "end": 37.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.02, + "end": 37.06, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.06, + "end": 37.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.1, + "end": 37.14, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.14, + "end": 37.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.18, + "end": 37.22, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.22, + "end": 37.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.26, + "end": 37.3, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 37.3, + "end": 37.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.34, + "end": 37.38, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.38, + "end": 37.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.42, + "end": 37.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.5, + "end": 37.54, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 37.54, + "end": 37.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.58, + "end": 37.62, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 37.62, + "end": 37.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.66, + "end": 37.7, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.7, + "end": 37.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.74, + "end": 37.78, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.78, + "end": 37.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.82, + "end": 37.86, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 37.86, + "end": 37.9, + "confidence": 0.994 + }, + { + "text": "got", + "start": 37.9, + "end": 37.94, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 37.94, + "end": 37.98, + "confidence": 0.994 + }, + { + "text": "got", + "start": 37.98, + "end": 38.02, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.02, + "end": 38.06, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.06, + "end": 38.1, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.1, + "end": 38.14, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.14, + "end": 38.18, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 38.18, + "end": 38.22, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.22, + "end": 38.26, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 38.26, + "end": 38.3, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.3, + "end": 38.34, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 38.34, + "end": 38.38, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.38, + "end": 38.42, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 38.42, + "end": 38.46, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.46, + "end": 38.5, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 38.5, + "end": 38.54, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.54, + "end": 38.58, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 38.58, + "end": 38.62, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.62, + "end": 38.66, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 38.66, + "end": 38.7, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.7, + "end": 38.74, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 38.74, + "end": 38.78, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.78, + "end": 38.82, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 38.82, + "end": 38.86, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.86, + "end": 38.9, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 38.9, + "end": 38.94, + "confidence": 0.995 + }, + { + "text": "got", + "start": 38.94, + "end": 38.98, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 38.98, + "end": 39.02, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.02, + "end": 39.06, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.06, + "end": 39.1, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.1, + "end": 39.14, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.14, + "end": 39.18, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.18, + "end": 39.22, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.22, + "end": 39.26, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.26, + "end": 39.3, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.3, + "end": 39.34, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.34, + "end": 39.38, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.38, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.42, + "end": 39.46, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.46, + "end": 39.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.5, + "end": 39.54, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.54, + "end": 39.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.58, + "end": 39.62, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.62, + "end": 39.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.66, + "end": 39.7, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.7, + "end": 39.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.74, + "end": 39.78, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.78, + "end": 39.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.82, + "end": 39.86, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.86, + "end": 39.9, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.9, + "end": 39.94, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.94, + "end": 39.98, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.98, + "end": 40.02, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.02, + "end": 40.06, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.06, + "end": 40.1, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.1, + "end": 40.14, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.14, + "end": 40.18, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.18, + "end": 40.22, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.22, + "end": 40.26, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.26, + "end": 40.3, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.3, + "end": 40.34, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.34, + "end": 40.38, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.38, + "end": 40.42, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.42, + "end": 40.46, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.46, + "end": 40.5, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.5, + "end": 40.54, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.54, + "end": 40.58, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.58, + "end": 40.62, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.62, + "end": 40.66, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.66, + "end": 40.7, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.7, + "end": 40.74, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.74, + "end": 40.78, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.78, + "end": 40.82, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.82, + "end": 40.86, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.86, + "end": 40.9, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.9, + "end": 40.94, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.94, + "end": 40.98, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.98, + "end": 41.02, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.02, + "end": 41.06, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.06, + "end": 42.94, + "confidence": 0.996 + }, + { + "text": "got", + "start": 42.94, + "end": 46.82, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.82, + "end": 47.8, + "confidence": 0.996 + }, + { + "text": "got", + "start": 47.8, + "end": 48.58, + "confidence": 0.996 + }, + { + "text": "them,", + "start": 48.58, + "end": 50.82, + "confidence": 0.996 + }, + { + "text": "got", + "start": 50.82, + "end": 51.64, + "confidence": 0.996 + }, + { + "text": "them", + "start": 51.64, + "end": 55.0, + "confidence": 0.997 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 72.06, + "end": 85.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.053046889369263245, + "compression_ratio": 29.52, + "no_speech_prob": 0.24410122632980347, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 72.06, + "end": 72.1, + "confidence": 0.214 + }, + { + "text": "them,", + "start": 72.1, + "end": 72.14, + "confidence": 0.95 + }, + { + "text": "got", + "start": 72.14, + "end": 72.18, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 72.18, + "end": 72.22, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.22, + "end": 72.26, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 72.26, + "end": 72.3, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.3, + "end": 72.34, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 72.34, + "end": 72.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.38, + "end": 72.42, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 72.42, + "end": 72.46, + "confidence": 0.993 + }, + { + "text": "got", + "start": 72.46, + "end": 72.5, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 72.5, + "end": 72.54, + "confidence": 0.988 + }, + { + "text": "got", + "start": 72.54, + "end": 72.58, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 72.58, + "end": 72.62, + "confidence": 0.982 + }, + { + "text": "got", + "start": 72.62, + "end": 72.66, + "confidence": 0.863 + }, + { + "text": "them,", + "start": 72.66, + "end": 72.7, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.7, + "end": 72.74, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 72.74, + "end": 72.78, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.78, + "end": 72.82, + "confidence": 0.852 + }, + { + "text": "them,", + "start": 72.82, + "end": 72.86, + "confidence": 0.925 + }, + { + "text": "got", + "start": 72.86, + "end": 72.9, + "confidence": 0.56 + }, + { + "text": "them,", + "start": 72.9, + "end": 72.94, + "confidence": 0.958 + }, + { + "text": "got", + "start": 72.94, + "end": 72.98, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 72.98, + "end": 73.02, + "confidence": 0.976 + }, + { + "text": "got", + "start": 73.02, + "end": 73.06, + "confidence": 0.843 + }, + { + "text": "them,", + "start": 73.06, + "end": 73.1, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.1, + "end": 73.14, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 73.14, + "end": 73.18, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.18, + "end": 73.22, + "confidence": 0.836 + }, + { + "text": "them,", + "start": 73.22, + "end": 73.26, + "confidence": 0.979 + }, + { + "text": "got", + "start": 73.26, + "end": 73.3, + "confidence": 0.853 + }, + { + "text": "them,", + "start": 73.3, + "end": 73.34, + "confidence": 0.983 + }, + { + "text": "got", + "start": 73.34, + "end": 73.38, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 73.38, + "end": 73.42, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.42, + "end": 73.46, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 73.46, + "end": 73.5, + "confidence": 0.985 + }, + { + "text": "got", + "start": 73.5, + "end": 73.54, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 73.54, + "end": 73.58, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.58, + "end": 73.62, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 73.62, + "end": 73.66, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.66, + "end": 73.7, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 73.7, + "end": 73.74, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.74, + "end": 73.78, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 73.78, + "end": 73.82, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.82, + "end": 73.86, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 73.86, + "end": 73.9, + "confidence": 0.988 + }, + { + "text": "got", + "start": 73.9, + "end": 73.94, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 73.94, + "end": 73.98, + "confidence": 0.989 + }, + { + "text": "got", + "start": 73.98, + "end": 74.02, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 74.02, + "end": 74.06, + "confidence": 0.989 + }, + { + "text": "got", + "start": 74.06, + "end": 74.1, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 74.1, + "end": 74.14, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.14, + "end": 74.18, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 74.18, + "end": 74.22, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.22, + "end": 74.26, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 74.26, + "end": 74.3, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.3, + "end": 74.34, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 74.34, + "end": 74.38, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.38, + "end": 74.42, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 74.42, + "end": 74.46, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.46, + "end": 74.5, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 74.5, + "end": 74.54, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.54, + "end": 74.58, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 74.58, + "end": 74.62, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.62, + "end": 74.66, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.66, + "end": 74.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.7, + "end": 74.74, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 74.74, + "end": 74.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.78, + "end": 74.82, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 74.82, + "end": 74.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.86, + "end": 74.9, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 74.9, + "end": 74.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.94, + "end": 74.98, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 74.98, + "end": 75.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.02, + "end": 75.06, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 75.06, + "end": 75.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.1, + "end": 75.14, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 75.14, + "end": 75.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.18, + "end": 75.22, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 75.22, + "end": 75.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.26, + "end": 75.3, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 75.3, + "end": 75.34, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.34, + "end": 75.38, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 75.38, + "end": 75.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.42, + "end": 75.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.46, + "end": 75.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.5, + "end": 75.54, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.54, + "end": 75.58, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.58, + "end": 75.62, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 75.62, + "end": 75.66, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.66, + "end": 75.7, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 75.7, + "end": 75.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.74, + "end": 75.78, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.78, + "end": 75.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.82, + "end": 75.86, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.86, + "end": 75.9, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.9, + "end": 75.94, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 75.94, + "end": 75.98, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.98, + "end": 76.02, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.02, + "end": 76.06, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.06, + "end": 76.1, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.1, + "end": 76.14, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.14, + "end": 76.18, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 76.18, + "end": 76.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.22, + "end": 76.26, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.26, + "end": 76.3, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.3, + "end": 76.34, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.34, + "end": 76.38, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.38, + "end": 76.42, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.42, + "end": 76.46, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.46, + "end": 76.5, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.5, + "end": 76.54, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.54, + "end": 76.58, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.58, + "end": 76.62, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.62, + "end": 76.66, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.66, + "end": 76.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.7, + "end": 76.74, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.74, + "end": 76.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.78, + "end": 76.82, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.82, + "end": 76.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.86, + "end": 76.9, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.9, + "end": 76.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.94, + "end": 76.98, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.98, + "end": 77.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.02, + "end": 77.06, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.06, + "end": 77.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.1, + "end": 77.14, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.14, + "end": 77.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.18, + "end": 77.22, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.22, + "end": 77.26, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.26, + "end": 77.3, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.3, + "end": 77.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.34, + "end": 77.38, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.38, + "end": 77.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.42, + "end": 77.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.46, + "end": 77.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.5, + "end": 77.54, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.54, + "end": 77.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.58, + "end": 77.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.62, + "end": 77.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.66, + "end": 77.7, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.7, + "end": 77.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.74, + "end": 77.78, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.78, + "end": 77.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.82, + "end": 77.86, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.86, + "end": 79.56, + "confidence": 0.994 + }, + { + "text": "got", + "start": 79.56, + "end": 79.6, + "confidence": 0.981 + }, + { + "text": "them", + "start": 79.6, + "end": 85.0, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.02, + "end": 115.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04965524716227578, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971923112869263, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 85.02, + "end": 85.46, + "confidence": 0.438 + }, + { + "text": "them,", + "start": 85.46, + "end": 86.26, + "confidence": 0.943 + }, + { + "text": "got", + "start": 86.26, + "end": 86.78, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 86.78, + "end": 87.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 87.18, + "end": 87.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 87.8, + "end": 87.84, + "confidence": 0.993 + }, + { + "text": "got", + "start": 87.84, + "end": 88.6, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 88.6, + "end": 88.64, + "confidence": 0.991 + }, + { + "text": "got", + "start": 88.64, + "end": 88.9, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 88.9, + "end": 88.94, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.94, + "end": 88.98, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 88.98, + "end": 89.02, + "confidence": 0.987 + }, + { + "text": "got", + "start": 89.02, + "end": 89.06, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 89.06, + "end": 89.1, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.1, + "end": 89.14, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 89.14, + "end": 89.18, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.18, + "end": 89.22, + "confidence": 0.907 + }, + { + "text": "them,", + "start": 89.22, + "end": 89.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 89.26, + "end": 89.3, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 89.3, + "end": 89.34, + "confidence": 0.971 + }, + { + "text": "got", + "start": 89.34, + "end": 89.38, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 89.38, + "end": 89.42, + "confidence": 0.959 + }, + { + "text": "got", + "start": 89.42, + "end": 89.46, + "confidence": 0.845 + }, + { + "text": "them,", + "start": 89.46, + "end": 89.5, + "confidence": 0.96 + }, + { + "text": "got", + "start": 89.5, + "end": 89.54, + "confidence": 0.861 + }, + { + "text": "them,", + "start": 89.54, + "end": 89.58, + "confidence": 0.963 + }, + { + "text": "got", + "start": 89.58, + "end": 89.62, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 89.62, + "end": 89.66, + "confidence": 0.965 + }, + { + "text": "got", + "start": 89.66, + "end": 89.7, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 89.7, + "end": 89.74, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.74, + "end": 89.78, + "confidence": 0.887 + }, + { + "text": "them,", + "start": 89.78, + "end": 89.82, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.82, + "end": 89.86, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 89.86, + "end": 89.9, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.9, + "end": 89.94, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 89.94, + "end": 89.98, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.98, + "end": 90.02, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 90.02, + "end": 90.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.06, + "end": 90.1, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.1, + "end": 90.14, + "confidence": 0.964 + }, + { + "text": "got", + "start": 90.14, + "end": 90.18, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.18, + "end": 90.22, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.22, + "end": 90.26, + "confidence": 0.86 + }, + { + "text": "them,", + "start": 90.26, + "end": 90.3, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.3, + "end": 90.34, + "confidence": 0.867 + }, + { + "text": "them,", + "start": 90.34, + "end": 90.38, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.38, + "end": 90.42, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.42, + "end": 90.46, + "confidence": 0.967 + }, + { + "text": "got", + "start": 90.46, + "end": 90.5, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.5, + "end": 90.54, + "confidence": 0.968 + }, + { + "text": "got", + "start": 90.54, + "end": 90.58, + "confidence": 0.87 + }, + { + "text": "them,", + "start": 90.58, + "end": 90.62, + "confidence": 0.969 + }, + { + "text": "got", + "start": 90.62, + "end": 90.66, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.66, + "end": 90.7, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.7, + "end": 90.74, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.74, + "end": 90.78, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.78, + "end": 90.82, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 90.82, + "end": 90.86, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.86, + "end": 90.9, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 90.9, + "end": 90.94, + "confidence": 0.973 + }, + { + "text": "got", + "start": 90.94, + "end": 90.98, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 90.98, + "end": 91.02, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.02, + "end": 91.06, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 91.06, + "end": 91.1, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.1, + "end": 91.14, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 91.14, + "end": 91.18, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.18, + "end": 91.22, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 91.22, + "end": 91.26, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.26, + "end": 91.3, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 91.3, + "end": 91.34, + "confidence": 0.976 + }, + { + "text": "got", + "start": 91.34, + "end": 91.38, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 91.38, + "end": 91.42, + "confidence": 0.977 + }, + { + "text": "got", + "start": 91.42, + "end": 91.46, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 91.46, + "end": 91.5, + "confidence": 0.978 + }, + { + "text": "got", + "start": 91.5, + "end": 91.54, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 91.54, + "end": 91.58, + "confidence": 0.979 + }, + { + "text": "got", + "start": 91.58, + "end": 91.62, + "confidence": 0.92 + }, + { + "text": "them,", + "start": 91.62, + "end": 91.66, + "confidence": 0.98 + }, + { + "text": "got", + "start": 91.66, + "end": 91.7, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 91.7, + "end": 91.74, + "confidence": 0.981 + }, + { + "text": "got", + "start": 91.74, + "end": 91.78, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 91.78, + "end": 91.82, + "confidence": 0.982 + }, + { + "text": "got", + "start": 91.82, + "end": 91.86, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 91.86, + "end": 91.9, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.9, + "end": 91.94, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 91.94, + "end": 91.98, + "confidence": 0.984 + }, + { + "text": "got", + "start": 91.98, + "end": 92.02, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 92.02, + "end": 92.06, + "confidence": 0.985 + }, + { + "text": "got", + "start": 92.06, + "end": 92.1, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 92.1, + "end": 92.14, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.14, + "end": 92.18, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 92.18, + "end": 92.22, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.22, + "end": 92.26, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 92.26, + "end": 92.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.3, + "end": 92.34, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 92.34, + "end": 92.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.38, + "end": 92.42, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 92.42, + "end": 92.46, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.46, + "end": 92.5, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.5, + "end": 92.54, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.54, + "end": 92.58, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.58, + "end": 92.62, + "confidence": 0.99 + }, + { + "text": "got", + "start": 92.62, + "end": 92.66, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 92.66, + "end": 92.7, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.7, + "end": 92.74, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 92.74, + "end": 92.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.78, + "end": 92.82, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 92.82, + "end": 92.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.86, + "end": 92.9, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 92.9, + "end": 92.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.94, + "end": 92.98, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 92.98, + "end": 93.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.02, + "end": 93.06, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 93.06, + "end": 93.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.1, + "end": 93.14, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 93.14, + "end": 93.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.18, + "end": 93.22, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.22, + "end": 93.26, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.26, + "end": 93.3, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.3, + "end": 93.34, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.34, + "end": 93.38, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 93.38, + "end": 93.42, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.42, + "end": 93.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 93.46, + "end": 93.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.5, + "end": 93.54, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 93.54, + "end": 93.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.58, + "end": 93.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.62, + "end": 93.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.66, + "end": 93.7, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 93.7, + "end": 93.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.74, + "end": 93.78, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.78, + "end": 93.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.82, + "end": 93.86, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.86, + "end": 93.9, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.9, + "end": 93.94, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.94, + "end": 93.98, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.98, + "end": 94.02, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.02, + "end": 94.06, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.06, + "end": 94.1, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.1, + "end": 94.14, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.14, + "end": 94.18, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 94.18, + "end": 94.22, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.22, + "end": 94.26, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.26, + "end": 94.3, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.3, + "end": 94.34, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.34, + "end": 94.38, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.38, + "end": 94.42, + "confidence": 0.987 + }, + { + "text": "them", + "start": 94.42, + "end": 115.0, + "confidence": 0.996 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json b/tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..677fc99f601c596a9ccca9fba21d9d291cc405aa --- /dev/null +++ b/tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json @@ -0,0 +1,1261 @@ +{ + "text": " I'm all around my business and we got a recommendation for you on your door to the VA We have 18A's Okay, okay, I think I'll just get him or if they like just want to go on the Yeah, now what you want is on having a B1 And you just get the other one on my Now when we're here, we're going to go on the Yeah Okay, we want to hear that This is it, we know we just You can hit that That's how much you've ordered in that So I guess I'm working on my position but it's not Oh my god Okay, no problem No idea No idea Okay Okay", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.6, + "end": 5.36, + "text": " I'm all around my business and we got a recommendation for you on your door to the VA", + "tokens": [ + 50364, + 286, + 478, + 439, + 926, + 452, + 1606, + 293, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 2853, + 281, + 264, + 18527, + 50628 + ], + "temperature": 0.0, + "avg_logprob": -1.1825484400210173, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35393258929252625, + "confidence": 0.324, + "words": [ + { + "text": "I'm", + "start": 0.6, + "end": 0.82, + "confidence": 0.168 + }, + { + "text": "all", + "start": 0.82, + "end": 1.06, + "confidence": 0.082 + }, + { + "text": "around", + "start": 1.06, + "end": 1.28, + "confidence": 0.103 + }, + { + "text": "my", + "start": 1.28, + "end": 1.46, + "confidence": 0.128 + }, + { + "text": "business", + "start": 1.46, + "end": 1.72, + "confidence": 0.344 + }, + { + "text": "and", + "start": 1.72, + "end": 1.92, + "confidence": 0.616 + }, + { + "text": "we", + "start": 1.92, + "end": 1.98, + "confidence": 0.939 + }, + { + "text": "got", + "start": 1.98, + "end": 2.18, + "confidence": 0.4 + }, + { + "text": "a", + "start": 2.18, + "end": 2.4, + "confidence": 0.639 + }, + { + "text": "recommendation", + "start": 2.4, + "end": 3.06, + "confidence": 0.601 + }, + { + "text": "for", + "start": 3.06, + "end": 3.5, + "confidence": 0.371 + }, + { + "text": "you", + "start": 3.5, + "end": 3.68, + "confidence": 0.974 + }, + { + "text": "on", + "start": 3.68, + "end": 3.88, + "confidence": 0.804 + }, + { + "text": "your", + "start": 3.88, + "end": 4.24, + "confidence": 0.559 + }, + { + "text": "door", + "start": 4.24, + "end": 4.68, + "confidence": 0.145 + }, + { + "text": "to", + "start": 4.68, + "end": 4.88, + "confidence": 0.182 + }, + { + "text": "the", + "start": 4.88, + "end": 5.08, + "confidence": 0.502 + }, + { + "text": "VA", + "start": 5.08, + "end": 5.36, + "confidence": 0.251 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 5.36, + "end": 6.94, + "text": " We have 18A's", + "tokens": [ + 50628, + 492, + 362, + 2443, + 32, + 311, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -1.1825484400210173, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35393258929252625, + "confidence": 0.175, + "words": [ + { + "text": "We", + "start": 5.36, + "end": 5.74, + "confidence": 0.042 + }, + { + "text": "have", + "start": 5.74, + "end": 5.76, + "confidence": 0.655 + }, + { + "text": "18A's", + "start": 5.76, + "end": 6.94, + "confidence": 0.182 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.0, + "end": 16.79, + "text": " Okay, okay, I think I'll just get him or if they like just want to go on the", + "tokens": [ + 50914, + 1033, + 11, + 1392, + 11, + 286, + 519, + 286, + 603, + 445, + 483, + 796, + 420, + 498, + 436, + 411, + 445, + 528, + 281, + 352, + 322, + 264, + 51197 + ], + "temperature": 0.0, + "avg_logprob": -1.1825484400210173, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35393258929252625, + "confidence": 0.327, + "words": [ + { + "text": "Okay,", + "start": 12.0, + "end": 12.38, + "confidence": 0.662 + }, + { + "text": "okay,", + "start": 12.88, + "end": 13.12, + "confidence": 0.293 + }, + { + "text": "I", + "start": 13.12, + "end": 13.3, + "confidence": 0.208 + }, + { + "text": "think", + "start": 13.3, + "end": 13.58, + "confidence": 0.262 + }, + { + "text": "I'll", + "start": 13.58, + "end": 13.9, + "confidence": 0.199 + }, + { + "text": "just", + "start": 13.9, + "end": 14.1, + "confidence": 0.17 + }, + { + "text": "get", + "start": 14.1, + "end": 14.28, + "confidence": 0.249 + }, + { + "text": "him", + "start": 14.28, + "end": 14.52, + "confidence": 0.154 + }, + { + "text": "or", + "start": 14.52, + "end": 14.74, + "confidence": 0.199 + }, + { + "text": "if", + "start": 14.74, + "end": 14.94, + "confidence": 0.088 + }, + { + "text": "they", + "start": 14.94, + "end": 15.44, + "confidence": 0.605 + }, + { + "text": "like", + "start": 15.44, + "end": 15.74, + "confidence": 0.201 + }, + { + "text": "just", + "start": 15.74, + "end": 15.96, + "confidence": 0.458 + }, + { + "text": "want", + "start": 15.96, + "end": 16.18, + "confidence": 0.766 + }, + { + "text": "to", + "start": 16.18, + "end": 16.36, + "confidence": 0.912 + }, + { + "text": "go", + "start": 16.36, + "end": 16.56, + "confidence": 0.949 + }, + { + "text": "on", + "start": 16.56, + "end": 16.7, + "confidence": 0.564 + }, + { + "text": "the", + "start": 16.7, + "end": 16.79, + "confidence": 0.583 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 16.79, + "end": 19.0, + "text": " Yeah, now what you want is on having a B1", + "tokens": [ + 51197, + 865, + 11, + 586, + 437, + 291, + 528, + 307, + 322, + 1419, + 257, + 363, + 16, + 51322 + ], + "temperature": 0.0, + "avg_logprob": -1.1825484400210173, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35393258929252625, + "confidence": 0.471, + "words": [ + { + "text": "Yeah,", + "start": 16.79, + "end": 17.1, + "confidence": 0.28 + }, + { + "text": "now", + "start": 17.18, + "end": 17.3, + "confidence": 0.363 + }, + { + "text": "what", + "start": 17.3, + "end": 17.48, + "confidence": 0.742 + }, + { + "text": "you", + "start": 17.48, + "end": 17.62, + "confidence": 0.932 + }, + { + "text": "want", + "start": 17.62, + "end": 17.84, + "confidence": 0.744 + }, + { + "text": "is", + "start": 17.84, + "end": 17.96, + "confidence": 0.676 + }, + { + "text": "on", + "start": 17.96, + "end": 18.16, + "confidence": 0.328 + }, + { + "text": "having", + "start": 18.16, + "end": 18.44, + "confidence": 0.564 + }, + { + "text": "a", + "start": 18.44, + "end": 18.76, + "confidence": 0.285 + }, + { + "text": "B1", + "start": 18.76, + "end": 19.0, + "confidence": 0.37 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 19.0, + "end": 21.88, + "text": " And you just get the other one on my", + "tokens": [ + 51322, + 400, + 291, + 445, + 483, + 264, + 661, + 472, + 322, + 452, + 51462 + ], + "temperature": 0.0, + "avg_logprob": -1.1825484400210173, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35393258929252625, + "confidence": 0.516, + "words": [ + { + "text": "And", + "start": 19.0, + "end": 20.26, + "confidence": 0.273 + }, + { + "text": "you", + "start": 20.26, + "end": 20.4, + "confidence": 0.847 + }, + { + "text": "just", + "start": 20.4, + "end": 20.66, + "confidence": 0.693 + }, + { + "text": "get", + "start": 20.66, + "end": 20.8, + "confidence": 0.134 + }, + { + "text": "the", + "start": 20.8, + "end": 20.9, + "confidence": 0.655 + }, + { + "text": "other", + "start": 20.9, + "end": 21.08, + "confidence": 0.938 + }, + { + "text": "one", + "start": 21.08, + "end": 21.26, + "confidence": 0.976 + }, + { + "text": "on", + "start": 21.26, + "end": 21.66, + "confidence": 0.872 + }, + { + "text": "my", + "start": 21.66, + "end": 21.88, + "confidence": 0.23 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 22.12, + "end": 25.36, + "text": " Now when we're here, we're going to go on the", + "tokens": [ + 51462, + 823, + 562, + 321, + 434, + 510, + 11, + 321, + 434, + 516, + 281, + 352, + 322, + 264, + 51616 + ], + "temperature": 0.0, + "avg_logprob": -1.1825484400210173, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35393258929252625, + "confidence": 0.336, + "words": [ + { + "text": "Now", + "start": 22.12, + "end": 22.8, + "confidence": 0.169 + }, + { + "text": "when", + "start": 22.8, + "end": 23.04, + "confidence": 0.351 + }, + { + "text": "we're", + "start": 23.04, + "end": 23.24, + "confidence": 0.745 + }, + { + "text": "here,", + "start": 23.24, + "end": 23.44, + "confidence": 0.492 + }, + { + "text": "we're", + "start": 23.66, + "end": 23.88, + "confidence": 0.292 + }, + { + "text": "going", + "start": 23.88, + "end": 23.94, + "confidence": 0.291 + }, + { + "text": "to", + "start": 23.94, + "end": 24.38, + "confidence": 0.88 + }, + { + "text": "go", + "start": 24.38, + "end": 24.72, + "confidence": 0.119 + }, + { + "text": "on", + "start": 24.72, + "end": 25.34, + "confidence": 0.134 + }, + { + "text": "the", + "start": 25.34, + "end": 25.36, + "confidence": 0.367 + } + ] + }, + { + "id": 6, + "seek": 3000, + "start": 31.36, + "end": 31.56, + "text": " Yeah", + "tokens": [ + 50364, + 865, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.6112356185913086, + "compression_ratio": 0.3333333333333333, + "no_speech_prob": 0.2908710241317749, + "confidence": 0.058, + "words": [ + { + "text": "Yeah", + "start": 31.36, + "end": 31.56, + "confidence": 0.058 + } + ] + }, + { + "id": 7, + "seek": 6000, + "start": 62.7, + "end": 64.14, + "text": " Okay, we want to hear that", + "tokens": [ + 50364, + 1033, + 11, + 321, + 528, + 281, + 1568, + 300, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.355, + "words": [ + { + "text": "Okay,", + "start": 62.7, + "end": 62.72, + "confidence": 0.125 + }, + { + "text": "we", + "start": 63.1, + "end": 63.32, + "confidence": 0.238 + }, + { + "text": "want", + "start": 63.32, + "end": 63.62, + "confidence": 0.523 + }, + { + "text": "to", + "start": 63.62, + "end": 63.72, + "confidence": 0.571 + }, + { + "text": "hear", + "start": 63.72, + "end": 63.84, + "confidence": 0.243 + }, + { + "text": "that", + "start": 63.84, + "end": 64.14, + "confidence": 0.92 + } + ] + }, + { + "id": 8, + "seek": 6000, + "start": 64.46, + "end": 66.82, + "text": " This is it, we know we just", + "tokens": [ + 50564, + 639, + 307, + 309, + 11, + 321, + 458, + 321, + 445, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.459, + "words": [ + { + "text": "This", + "start": 64.46, + "end": 64.72, + "confidence": 0.245 + }, + { + "text": "is", + "start": 64.72, + "end": 64.94, + "confidence": 0.663 + }, + { + "text": "it,", + "start": 64.94, + "end": 65.12, + "confidence": 0.243 + }, + { + "text": "we", + "start": 65.22, + "end": 65.42, + "confidence": 0.469 + }, + { + "text": "know", + "start": 65.42, + "end": 65.62, + "confidence": 0.696 + }, + { + "text": "we", + "start": 65.62, + "end": 65.88, + "confidence": 0.826 + }, + { + "text": "just", + "start": 65.88, + "end": 66.82, + "confidence": 0.402 + } + ] + }, + { + "id": 9, + "seek": 6000, + "start": 66.86, + "end": 68.3, + "text": " You can hit that", + "tokens": [ + 50714, + 509, + 393, + 2045, + 300, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.33, + "words": [ + { + "text": "You", + "start": 66.86, + "end": 67.14, + "confidence": 0.245 + }, + { + "text": "can", + "start": 67.14, + "end": 67.34, + "confidence": 0.577 + }, + { + "text": "hit", + "start": 67.34, + "end": 67.52, + "confidence": 0.127 + }, + { + "text": "that", + "start": 67.52, + "end": 68.3, + "confidence": 0.66 + } + ] + }, + { + "id": 10, + "seek": 6000, + "start": 68.32, + "end": 69.46, + "text": " That's how much you've ordered in that", + "tokens": [ + 50764, + 663, + 311, + 577, + 709, + 291, + 600, + 8866, + 294, + 300, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.3, + "words": [ + { + "text": "That's", + "start": 68.32, + "end": 68.58, + "confidence": 0.262 + }, + { + "text": "how", + "start": 68.58, + "end": 68.6, + "confidence": 0.237 + }, + { + "text": "much", + "start": 68.6, + "end": 68.68, + "confidence": 0.974 + }, + { + "text": "you've", + "start": 68.68, + "end": 68.84, + "confidence": 0.414 + }, + { + "text": "ordered", + "start": 68.84, + "end": 69.02, + "confidence": 0.145 + }, + { + "text": "in", + "start": 69.02, + "end": 69.22, + "confidence": 0.202 + }, + { + "text": "that", + "start": 69.22, + "end": 69.46, + "confidence": 0.245 + } + ] + }, + { + "id": 11, + "seek": 6000, + "start": 70.4, + "end": 72.52, + "text": " So I guess I'm working on my position but it's not", + "tokens": [ + 50864, + 407, + 286, + 2041, + 286, + 478, + 1364, + 322, + 452, + 2535, + 457, + 309, + 311, + 406, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.458, + "words": [ + { + "text": "So", + "start": 70.4, + "end": 70.42, + "confidence": 0.914 + }, + { + "text": "I", + "start": 70.42, + "end": 70.54, + "confidence": 0.727 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.8, + "confidence": 0.77 + }, + { + "text": "I'm", + "start": 70.8, + "end": 71.3, + "confidence": 0.363 + }, + { + "text": "working", + "start": 71.3, + "end": 71.32, + "confidence": 0.655 + }, + { + "text": "on", + "start": 71.32, + "end": 71.58, + "confidence": 0.394 + }, + { + "text": "my", + "start": 71.58, + "end": 71.74, + "confidence": 0.261 + }, + { + "text": "position", + "start": 71.74, + "end": 71.98, + "confidence": 0.08 + }, + { + "text": "but", + "start": 71.98, + "end": 72.18, + "confidence": 0.291 + }, + { + "text": "it's", + "start": 72.18, + "end": 72.38, + "confidence": 0.792 + }, + { + "text": "not", + "start": 72.38, + "end": 72.52, + "confidence": 0.586 + } + ] + }, + { + "id": 12, + "seek": 6000, + "start": 74.72, + "end": 75.22, + "text": " Oh my god", + "tokens": [ + 51014, + 876, + 452, + 3044, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.198, + "words": [ + { + "text": "Oh", + "start": 74.72, + "end": 74.74, + "confidence": 0.145 + }, + { + "text": "my", + "start": 74.74, + "end": 75.08, + "confidence": 0.271 + }, + { + "text": "god", + "start": 75.08, + "end": 75.22, + "confidence": 0.198 + } + ] + }, + { + "id": 13, + "seek": 6000, + "start": 75.22, + "end": 76.64, + "text": " Okay, no problem", + "tokens": [ + 51114, + 1033, + 11, + 572, + 1154, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.623, + "words": [ + { + "text": "Okay,", + "start": 75.22, + "end": 75.9, + "confidence": 0.684 + }, + { + "text": "no", + "start": 76.26, + "end": 76.32, + "confidence": 0.365 + }, + { + "text": "problem", + "start": 76.32, + "end": 76.64, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 6000, + "start": 76.64, + "end": 77.52, + "text": " No idea", + "tokens": [ + 51214, + 883, + 1558, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.223, + "words": [ + { + "text": "No", + "start": 76.64, + "end": 77.28, + "confidence": 0.223 + }, + { + "text": "idea", + "start": 77.28, + "end": 77.52, + "confidence": 0.224 + } + ] + }, + { + "id": 15, + "seek": 6000, + "start": 78.06, + "end": 78.6, + "text": " No idea", + "tokens": [ + 51264, + 883, + 1558, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.281, + "words": [ + { + "text": "No", + "start": 78.06, + "end": 78.38, + "confidence": 0.104 + }, + { + "text": "idea", + "start": 78.38, + "end": 78.6, + "confidence": 0.758 + } + ] + }, + { + "id": 16, + "seek": 6000, + "start": 85.28, + "end": 85.58, + "text": " Okay", + "tokens": [ + 51614, + 1033, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.102, + "words": [ + { + "text": "Okay", + "start": 85.28, + "end": 85.58, + "confidence": 0.102 + } + ] + }, + { + "id": 17, + "seek": 6000, + "start": 86.6, + "end": 86.86, + "text": " Okay", + "tokens": [ + 51664, + 1033, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.9396333570604201, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10202154517173767, + "confidence": 0.159, + "words": [ + { + "text": "Okay", + "start": 86.6, + "end": 86.86, + "confidence": 0.159 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.9092251062393188, + "zh": 0.006320780608803034, + "de": 0.0015733666950836778, + "es": 0.0061263106763362885, + "ru": 0.0024752530734986067, + "ko": 0.004660651553422213, + "fr": 0.0016618064837530255, + "ja": 0.0030565299093723297, + "pt": 0.0021007198374718428, + "tr": 0.0005610011285170913, + "pl": 0.0002320401690667495, + "ca": 5.730987686547451e-05, + "nl": 0.0019127273699268699, + "ar": 0.0010983869433403015, + "sv": 0.0010983869433403015, + "it": 0.00023940589744597673, + "id": 0.001178397098556161, + "hi": 0.00014865050616208464, + "fi": 0.0010079365456476808, + "vi": 0.005534660536795855, + "he": 0.00017109568580053747, + "uk": 0.0002933262730948627, + "el": 0.0003402644069865346, + "ms": 0.0010079365456476808, + "cs": 4.463297955226153e-05, + "ro": 0.00020799945923499763, + "da": 0.0010983869433403015, + "hu": 0.00016713225340936333, + "ta": 0.0002649981761351228, + "no": 8.876327046891674e-05, + "th": 0.001768983551301062, + "ur": 0.0001607295125722885, + "hr": 3.6144887417322025e-05, + "bg": 3.87777981813997e-05, + "lt": 1.9651633920148015e-05, + "la": 0.0020681514870375395, + "mi": 0.0017828581621870399, + "ml": 0.0014325665542855859, + "cy": 0.002436877926811576, + "sk": 8.65249694470549e-06, + "te": 9.016109106596559e-05, + "fa": 7.18826922820881e-05, + "lv": 2.4266530090244487e-05, + "bn": 8.01909263827838e-05, + "sr": 1.182656160381157e-05, + "az": 8.419111509283539e-06, + "sl": 3.1897754524834454e-05, + "kn": 5.086541023047175e-06, + "et": 1.0559930160525255e-05, + "mk": 5.227544988883892e-06, + "br": 0.0005698355962522328, + "eu": 1.7207545170094818e-05, + "is": 3.700203524203971e-05, + "hy": 1.7207545170094818e-05, + "ne": 4.5335844333749264e-05, + "mn": 0.00018644951342139393, + "bs": 2.793060411931947e-05, + "kk": 4.524062660493655e-06, + "sq": 2.4841992853907868e-05, + "sw": 9.597598545951769e-05, + "gl": 0.00016844308993313462, + "mr": 1.8751719835563563e-05, + "pa": 6.159553322504507e-06, + "si": 0.00013429454702418298, + "km": 0.0008035970968194306, + "sn": 0.00027555451379157603, + "yo": 0.00018355887732468545, + "so": 4.20045034843497e-06, + "af": 8.01909263827838e-05, + "oc": 3.1897754524834454e-05, + "ka": 8.192018867703155e-06, + "be": 1.7207545170094818e-05, + "tg": 1.2634811241696298e-07, + "sd": 3.0916358809918165e-05, + "gu": 3.482295596768381e-06, + "am": 1.168877770396648e-05, + "yi": 0.00014981639105826616, + "lo": 2.8592958187800832e-05, + "uz": 1.965909213197392e-09, + "fo": 0.0004507770645432174, + "ht": 0.00015578439342789352, + "ps": 9.282774954044726e-06, + "tk": 1.764214729860214e-08, + "nn": 0.015162764117121696, + "mt": 1.2394129953463562e-05, + "sa": 9.448800847167149e-05, + "lb": 1.368819511071706e-07, + "my": 0.00024317600764334202, + "bo": 0.0002843015536200255, + "tl": 0.0014665389899164438, + "mg": 3.6058203534139466e-08, + "as": 1.2589309335453436e-05, + "tt": 1.3741770032993372e-07, + "haw": 0.010585315525531769, + "ln": 9.958963346434757e-06, + "ha": 1.6608144903784705e-07, + "ba": 1.786754566523996e-08, + "jw": 0.004482104443013668, + "su": 1.1063409743883312e-07 + } +} \ No newline at end of file diff --git a/tests/expected/corner_cases/arabic.mp3.words.json b/tests/expected/corner_cases/arabic.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f88ce5e8e84ed6fbfd0acd935f21544d17bd4c95 --- /dev/null +++ b/tests/expected/corner_cases/arabic.mp3.words.json @@ -0,0 +1,393 @@ +{ + "text": " حبّي أنصار الهدى حبّي ركب الفدا وطلّبي سأر الشاهد وطلّبي سأر الشاهد لمتى سنضل رقودا نغرق في النوم ونشخر وعن الآذان نسمّه ونغمّض كي لا نمسر", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.8, + "end": 7.48, + "text": " حبّي أنصار الهدى", + "tokens": [ + 50364, + 11331, + 3555, + 11703, + 1829, + 14739, + 9381, + 9640, + 2423, + 3224, + 3215, + 7578, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5114130362486228, + "compression_ratio": 1.1842105263157894, + "no_speech_prob": 0.7993651032447815, + "confidence": 0.651, + "words": [ + { + "text": "حبّي", + "start": 1.8, + "end": 4.26, + "confidence": 0.436 + }, + { + "text": "أنصار", + "start": 4.26, + "end": 5.7, + "confidence": 0.795 + }, + { + "text": "الهدى", + "start": 5.7, + "end": 7.48, + "confidence": 0.837 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 9.24, + "end": 15.2, + "text": " حبّي ركب الفدا", + "tokens": [ + 50714, + 11331, + 3555, + 11703, + 1829, + 12602, + 4117, + 3555, + 27188, + 28259, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5114130362486228, + "compression_ratio": 1.1842105263157894, + "no_speech_prob": 0.7993651032447815, + "confidence": 0.829, + "words": [ + { + "text": "حبّي", + "start": 9.24, + "end": 12.58, + "confidence": 0.875 + }, + { + "text": "ركب", + "start": 12.58, + "end": 13.76, + "confidence": 0.853 + }, + { + "text": "الفدا", + "start": 13.76, + "end": 15.2, + "confidence": 0.711 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 18.86, + "end": 23.42, + "text": " وطلّبي سأر الشاهد", + "tokens": [ + 51114, + 4032, + 9566, + 1211, + 11703, + 21292, + 8608, + 10721, + 2288, + 25124, + 40294, + 3215, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.5114130362486228, + "compression_ratio": 1.1842105263157894, + "no_speech_prob": 0.7993651032447815, + "confidence": 0.587, + "words": [ + { + "text": "وطلّبي", + "start": 18.86, + "end": 20.68, + "confidence": 0.54 + }, + { + "text": "سأر", + "start": 20.68, + "end": 21.78, + "confidence": 0.94 + }, + { + "text": "الشاهد", + "start": 21.78, + "end": 23.42, + "confidence": 0.421 + } + ] + }, + { + "id": 3, + "seek": 2300, + "start": 24.2, + "end": 28.96, + "text": " وطلّبي سأر الشاهد", + "tokens": [ + 50414, + 4032, + 9566, + 1211, + 11703, + 21292, + 8608, + 10721, + 2288, + 25124, + 40294, + 3215, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.24054829042349288, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.875, + "words": [ + { + "text": "وطلّبي", + "start": 24.2, + "end": 25.98, + "confidence": 0.799 + }, + { + "text": "سأر", + "start": 25.98, + "end": 27.12, + "confidence": 0.95 + }, + { + "text": "الشاهد", + "start": 27.12, + "end": 28.96, + "confidence": 0.936 + } + ] + }, + { + "id": 4, + "seek": 2300, + "start": 30.88, + "end": 33.34, + "text": " لمتى سنضل رقودا", + "tokens": [ + 50714, + 32767, + 49975, + 8608, + 1863, + 11242, + 1211, + 12602, + 4587, + 23328, + 995, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.24054829042349288, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.714, + "words": [ + { + "text": "لمتى", + "start": 30.88, + "end": 31.48, + "confidence": 0.692 + }, + { + "text": "سنضل", + "start": 31.48, + "end": 32.34, + "confidence": 0.754 + }, + { + "text": "رقودا", + "start": 32.34, + "end": 33.34, + "confidence": 0.686 + } + ] + }, + { + "id": 5, + "seek": 2300, + "start": 33.36, + "end": 35.88, + "text": " نغرق في النوم ونشخر", + "tokens": [ + 50864, + 8717, + 17082, + 2288, + 4587, + 8978, + 28239, + 20498, + 4032, + 1863, + 8592, + 34740, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.24054829042349288, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.926, + "words": [ + { + "text": "نغرق", + "start": 33.36, + "end": 34.22, + "confidence": 0.901 + }, + { + "text": "في", + "start": 34.22, + "end": 34.44, + "confidence": 0.873 + }, + { + "text": "النوم", + "start": 34.44, + "end": 35.04, + "confidence": 0.974 + }, + { + "text": "ونشخر", + "start": 35.04, + "end": 35.88, + "confidence": 0.942 + } + ] + }, + { + "id": 6, + "seek": 2300, + "start": 36.24, + "end": 38.72, + "text": " وعن الآذان نسمّه", + "tokens": [ + 51014, + 4032, + 3615, + 1863, + 6024, + 95, + 8848, + 7649, + 8717, + 38251, + 11703, + 3224, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.24054829042349288, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.765, + "words": [ + { + "text": "وعن", + "start": 36.24, + "end": 36.76, + "confidence": 0.923 + }, + { + "text": "الآذان", + "start": 36.76, + "end": 37.72, + "confidence": 0.82 + }, + { + "text": "نسمّه", + "start": 37.72, + "end": 38.72, + "confidence": 0.619 + } + ] + }, + { + "id": 7, + "seek": 2300, + "start": 38.88, + "end": 41.3, + "text": " ونغمّض كي لا نمسر", + "tokens": [ + 51164, + 4032, + 1863, + 17082, + 2304, + 11703, + 11242, + 9122, + 1829, + 20193, + 8717, + 2304, + 3794, + 2288, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.24054829042349288, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.79, + "words": [ + { + "text": "ونغمّض", + "start": 38.88, + "end": 39.9, + "confidence": 0.91 + }, + { + "text": "كي", + "start": 39.9, + "end": 40.28, + "confidence": 0.833 + }, + { + "text": "لا", + "start": 40.28, + "end": 40.56, + "confidence": 0.804 + }, + { + "text": "نمسر", + "start": 40.56, + "end": 41.3, + "confidence": 0.619 + } + ] + } + ], + "language": "Arabic" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/issue24_empty.wav.words.json b/tests/expected/corner_cases/issue24_empty.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..d1fdebbdbb7db9cccd1618af253d1759eb610473 --- /dev/null +++ b/tests/expected/corner_cases/issue24_empty.wav.words.json @@ -0,0 +1,992 @@ +{ + "text": " I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry. I'm sorry.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 0.96, + "text": " I'm sorry.", + "tokens": [ + 50363, + 314, + 1101, + 7926, + 13, + 50413 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.102, + "words": [ + { + "text": "I'm", + "start": 0.0, + "end": 0.94, + "confidence": 0.105 + }, + { + "text": "sorry", + "start": 0.94, + "end": 0.96, + "confidence": 0.097 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 0.96, + "end": 2.38, + "text": " I'm sorry.", + "tokens": [ + 50413, + 314, + 1101, + 7926, + 13, + 50463 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.406, + "words": [ + { + "text": "I'm", + "start": 0.96, + "end": 2.26, + "confidence": 0.355 + }, + { + "text": "sorry", + "start": 2.26, + "end": 2.38, + "confidence": 0.534 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 2.38, + "end": 2.42, + "text": " I'm sorry.", + "tokens": [ + 50463, + 314, + 1101, + 7926, + 13, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.612, + "words": [ + { + "text": "I'm", + "start": 2.38, + "end": 2.4, + "confidence": 0.552 + }, + { + "text": "sorry", + "start": 2.4, + "end": 2.42, + "confidence": 0.753 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 3.0, + "end": 3.24, + "text": " I'm sorry.", + "tokens": [ + 50513, + 314, + 1101, + 7926, + 13, + 50563 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.765, + "words": [ + { + "text": "I'm", + "start": 3.0, + "end": 3.22, + "confidence": 0.714 + }, + { + "text": "sorry", + "start": 3.22, + "end": 3.24, + "confidence": 0.877 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 4.36, + "end": 4.48, + "text": " I'm sorry.", + "tokens": [ + 50563, + 314, + 1101, + 7926, + 13, + 50613 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.849, + "words": [ + { + "text": "I'm", + "start": 4.36, + "end": 4.42, + "confidence": 0.804 + }, + { + "text": "sorry", + "start": 4.42, + "end": 4.48, + "confidence": 0.947 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 5.5, + "end": 5.78, + "text": " I'm sorry.", + "tokens": [ + 50613, + 314, + 1101, + 7926, + 13, + 50663 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.89, + "words": [ + { + "text": "I'm", + "start": 5.5, + "end": 5.54, + "confidence": 0.852 + }, + { + "text": "sorry", + "start": 5.54, + "end": 5.78, + "confidence": 0.972 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 5.92, + "end": 6.8, + "text": " I'm sorry.", + "tokens": [ + 50663, + 314, + 1101, + 7926, + 13, + 50713 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.918, + "words": [ + { + "text": "I'm", + "start": 5.92, + "end": 6.14, + "confidence": 0.888 + }, + { + "text": "sorry", + "start": 6.14, + "end": 6.8, + "confidence": 0.983 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 7.5, + "end": 8.5, + "text": " I'm sorry.", + "tokens": [ + 50713, + 314, + 1101, + 7926, + 13, + 50763 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.941, + "words": [ + { + "text": "I'm", + "start": 7.5, + "end": 8.48, + "confidence": 0.919 + }, + { + "text": "sorry", + "start": 8.48, + "end": 8.5, + "confidence": 0.988 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 8.56, + "end": 9.48, + "text": " I'm sorry.", + "tokens": [ + 50763, + 314, + 1101, + 7926, + 13, + 50813 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.95, + "words": [ + { + "text": "I'm", + "start": 8.56, + "end": 9.0, + "confidence": 0.93 + }, + { + "text": "sorry", + "start": 9.0, + "end": 9.48, + "confidence": 0.991 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 9.54, + "end": 10.38, + "text": " I'm sorry.", + "tokens": [ + 50813, + 314, + 1101, + 7926, + 13, + 50863 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.963, + "words": [ + { + "text": "I'm", + "start": 9.54, + "end": 9.56, + "confidence": 0.948 + }, + { + "text": "sorry", + "start": 9.56, + "end": 10.38, + "confidence": 0.993 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 10.38, + "end": 11.36, + "text": " I'm sorry.", + "tokens": [ + 50863, + 314, + 1101, + 7926, + 13, + 50913 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.968, + "words": [ + { + "text": "I'm", + "start": 10.38, + "end": 11.34, + "confidence": 0.955 + }, + { + "text": "sorry", + "start": 11.34, + "end": 11.36, + "confidence": 0.994 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 11.36, + "end": 12.48, + "text": " I'm sorry.", + "tokens": [ + 50913, + 314, + 1101, + 7926, + 13, + 50963 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.977, + "words": [ + { + "text": "I'm", + "start": 11.36, + "end": 12.06, + "confidence": 0.969 + }, + { + "text": "sorry", + "start": 12.06, + "end": 12.48, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 0, + "start": 12.54, + "end": 12.93, + "text": " I'm sorry.", + "tokens": [ + 50963, + 314, + 1101, + 7926, + 13, + 51013 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.979, + "words": [ + { + "text": "I'm", + "start": 12.54, + "end": 12.56, + "confidence": 0.971 + }, + { + "text": "sorry", + "start": 12.56, + "end": 12.93, + "confidence": 0.996 + } + ] + }, + { + "id": 13, + "seek": 0, + "start": 12.93, + "end": 14.5, + "text": " I'm sorry.", + "tokens": [ + 51013, + 314, + 1101, + 7926, + 13, + 51063 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.979, + "words": [ + { + "text": "I'm", + "start": 12.93, + "end": 14.48, + "confidence": 0.971 + }, + { + "text": "sorry", + "start": 14.48, + "end": 14.5, + "confidence": 0.996 + } + ] + }, + { + "id": 14, + "seek": 0, + "start": 14.5, + "end": 15.46, + "text": " I'm sorry.", + "tokens": [ + 51063, + 314, + 1101, + 7926, + 13, + 51113 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.982, + "words": [ + { + "text": "I'm", + "start": 14.5, + "end": 15.42, + "confidence": 0.974 + }, + { + "text": "sorry", + "start": 15.42, + "end": 15.46, + "confidence": 0.996 + } + ] + }, + { + "id": 15, + "seek": 0, + "start": 15.46, + "end": 16.48, + "text": " I'm sorry.", + "tokens": [ + 51113, + 314, + 1101, + 7926, + 13, + 51163 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.983, + "words": [ + { + "text": "I'm", + "start": 15.46, + "end": 16.38, + "confidence": 0.976 + }, + { + "text": "sorry", + "start": 16.38, + "end": 16.48, + "confidence": 0.997 + } + ] + }, + { + "id": 16, + "seek": 0, + "start": 16.48, + "end": 17.5, + "text": " I'm sorry.", + "tokens": [ + 51163, + 314, + 1101, + 7926, + 13, + 51213 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.982, + "words": [ + { + "text": "I'm", + "start": 16.48, + "end": 17.48, + "confidence": 0.974 + }, + { + "text": "sorry", + "start": 17.48, + "end": 17.5, + "confidence": 0.997 + } + ] + }, + { + "id": 17, + "seek": 0, + "start": 17.54, + "end": 18.5, + "text": " I'm sorry.", + "tokens": [ + 51213, + 314, + 1101, + 7926, + 13, + 51263 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.984, + "words": [ + { + "text": "I'm", + "start": 17.54, + "end": 18.48, + "confidence": 0.978 + }, + { + "text": "sorry", + "start": 18.48, + "end": 18.5, + "confidence": 0.997 + } + ] + }, + { + "id": 18, + "seek": 0, + "start": 18.5, + "end": 19.48, + "text": " I'm sorry.", + "tokens": [ + 51263, + 314, + 1101, + 7926, + 13, + 51313 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.984, + "words": [ + { + "text": "I'm", + "start": 18.5, + "end": 19.34, + "confidence": 0.978 + }, + { + "text": "sorry", + "start": 19.34, + "end": 19.48, + "confidence": 0.997 + } + ] + }, + { + "id": 19, + "seek": 0, + "start": 19.64, + "end": 20.32, + "text": " I'm sorry.", + "tokens": [ + 51313, + 314, + 1101, + 7926, + 13, + 51363 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.986, + "words": [ + { + "text": "I'm", + "start": 19.64, + "end": 19.68, + "confidence": 0.98 + }, + { + "text": "sorry", + "start": 19.68, + "end": 20.32, + "confidence": 0.997 + } + ] + }, + { + "id": 20, + "seek": 0, + "start": 20.32, + "end": 20.36, + "text": " I'm sorry.", + "tokens": [ + 51363, + 314, + 1101, + 7926, + 13, + 51413 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.986, + "words": [ + { + "text": "I'm", + "start": 20.32, + "end": 20.34, + "confidence": 0.98 + }, + { + "text": "sorry", + "start": 20.34, + "end": 20.36, + "confidence": 0.997 + } + ] + }, + { + "id": 21, + "seek": 0, + "start": 21.58, + "end": 22.5, + "text": " I'm sorry.", + "tokens": [ + 51413, + 314, + 1101, + 7926, + 13, + 51463 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.988, + "words": [ + { + "text": "I'm", + "start": 21.58, + "end": 22.48, + "confidence": 0.983 + }, + { + "text": "sorry", + "start": 22.48, + "end": 22.5, + "confidence": 0.997 + } + ] + }, + { + "id": 22, + "seek": 0, + "start": 22.58, + "end": 23.46, + "text": " I'm sorry.", + "tokens": [ + 51463, + 314, + 1101, + 7926, + 13, + 51513 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.987, + "words": [ + { + "text": "I'm", + "start": 22.58, + "end": 22.6, + "confidence": 0.982 + }, + { + "text": "sorry", + "start": 22.6, + "end": 23.46, + "confidence": 0.997 + } + ] + }, + { + "id": 23, + "seek": 0, + "start": 23.46, + "end": 23.7, + "text": " I'm sorry.", + "tokens": [ + 51513, + 314, + 1101, + 7926, + 13, + 51563 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.988, + "words": [ + { + "text": "I'm", + "start": 23.46, + "end": 23.48, + "confidence": 0.984 + }, + { + "text": "sorry", + "start": 23.48, + "end": 23.7, + "confidence": 0.997 + } + ] + }, + { + "id": 24, + "seek": 0, + "start": 24.5, + "end": 24.54, + "text": " I'm sorry.", + "tokens": [ + 51563, + 314, + 1101, + 7926, + 13, + 51613 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.988, + "words": [ + { + "text": "I'm", + "start": 24.5, + "end": 24.52, + "confidence": 0.984 + }, + { + "text": "sorry", + "start": 24.52, + "end": 24.54, + "confidence": 0.998 + } + ] + }, + { + "id": 25, + "seek": 0, + "start": 24.54, + "end": 26.48, + "text": " I'm sorry.", + "tokens": [ + 51613, + 314, + 1101, + 7926, + 13, + 51663 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.986, + "words": [ + { + "text": "I'm", + "start": 24.54, + "end": 26.26, + "confidence": 0.98 + }, + { + "text": "sorry", + "start": 26.26, + "end": 26.48, + "confidence": 0.998 + } + ] + }, + { + "id": 26, + "seek": 0, + "start": 26.48, + "end": 27.5, + "text": " I'm sorry.", + "tokens": [ + 51663, + 314, + 1101, + 7926, + 13, + 51713 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.982, + "words": [ + { + "text": "I'm", + "start": 26.48, + "end": 27.48, + "confidence": 0.975 + }, + { + "text": "sorry", + "start": 27.48, + "end": 27.5, + "confidence": 0.998 + } + ] + }, + { + "id": 27, + "seek": 0, + "start": 27.5, + "end": 28.46, + "text": " I'm sorry.", + "tokens": [ + 51713, + 314, + 1101, + 7926, + 13, + 51763 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.975, + "words": [ + { + "text": "I'm", + "start": 27.5, + "end": 28.44, + "confidence": 0.964 + }, + { + "text": "sorry", + "start": 28.44, + "end": 28.46, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 0, + "start": 28.46, + "end": 29.32, + "text": " I'm sorry.", + "tokens": [ + 51763, + 314, + 1101, + 7926, + 13, + 51813 + ], + "temperature": 0.0, + "avg_logprob": -0.12360930442810059, + "compression_ratio": 13.25, + "no_speech_prob": 0.7654247283935547, + "confidence": 0.954, + "words": [ + { + "text": "I'm", + "start": 28.46, + "end": 29.3, + "confidence": 0.933 + }, + { + "text": "sorry", + "start": 29.3, + "end": 29.32, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json b/tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..0b71495f73f9e8e0ab56f38a9885ea546309faee --- /dev/null +++ b/tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json @@ -0,0 +1,540 @@ +{ + "text": " Ella, my glorious love, how are you? Oh, I'm okay. I will be. I said she could stay with us tomorrow just until she feels better. Of course she can. No, this won't be for long. Well, if you can stay as long as you want, my love. I've really missed you. Pops.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 6.28, + "text": " Ella, my glorious love, how are you?", + "tokens": [ + 50364, + 29261, + 11, + 452, + 24026, + 959, + 11, + 577, + 366, + 291, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.452, + "words": [ + { + "text": "Ella,", + "start": 0.0, + "end": 1.68, + "confidence": 0.126 + }, + { + "text": "my", + "start": 1.94, + "end": 2.7, + "confidence": 0.367 + }, + { + "text": "glorious", + "start": 2.7, + "end": 3.5, + "confidence": 0.729 + }, + { + "text": "love,", + "start": 3.5, + "end": 4.18, + "confidence": 0.432 + }, + { + "text": "how", + "start": 4.94, + "end": 5.66, + "confidence": 0.693 + }, + { + "text": "are", + "start": 5.66, + "end": 6.02, + "confidence": 0.902 + }, + { + "text": "you?", + "start": 6.02, + "end": 6.28, + "confidence": 0.845 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.28, + "end": 9.18, + "text": " Oh, I'm okay. I will be.", + "tokens": [ + 50664, + 876, + 11, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.68, + "words": [ + { + "text": "Oh,", + "start": 6.28, + "end": 6.6, + "confidence": 0.427 + }, + { + "text": "I'm", + "start": 6.68, + "end": 7.02, + "confidence": 0.958 + }, + { + "text": "okay.", + "start": 7.02, + "end": 7.64, + "confidence": 0.604 + }, + { + "text": "I", + "start": 8.3, + "end": 8.58, + "confidence": 0.562 + }, + { + "text": "will", + "start": 8.58, + "end": 8.86, + "confidence": 0.813 + }, + { + "text": "be.", + "start": 8.86, + "end": 9.18, + "confidence": 0.871 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 9.18, + "end": 11.48, + "text": " I said she could stay with us tomorrow just until she feels better.", + "tokens": [ + 50814, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 445, + 1826, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.571, + "words": [ + { + "text": "I", + "start": 9.18, + "end": 9.44, + "confidence": 0.66 + }, + { + "text": "said", + "start": 9.44, + "end": 9.62, + "confidence": 0.869 + }, + { + "text": "she", + "start": 9.62, + "end": 9.76, + "confidence": 0.748 + }, + { + "text": "could", + "start": 9.76, + "end": 9.88, + "confidence": 0.813 + }, + { + "text": "stay", + "start": 9.88, + "end": 10.08, + "confidence": 0.925 + }, + { + "text": "with", + "start": 10.08, + "end": 10.22, + "confidence": 0.808 + }, + { + "text": "us", + "start": 10.22, + "end": 10.34, + "confidence": 0.628 + }, + { + "text": "tomorrow", + "start": 10.34, + "end": 10.56, + "confidence": 0.219 + }, + { + "text": "just", + "start": 10.56, + "end": 10.74, + "confidence": 0.067 + }, + { + "text": "until", + "start": 10.74, + "end": 10.86, + "confidence": 0.4 + }, + { + "text": "she", + "start": 10.86, + "end": 10.98, + "confidence": 0.803 + }, + { + "text": "feels", + "start": 10.98, + "end": 11.18, + "confidence": 0.788 + }, + { + "text": "better.", + "start": 11.18, + "end": 11.48, + "confidence": 0.807 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.5, + "end": 13.34, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.333, + "words": [ + { + "text": "Of", + "start": 11.5, + "end": 12.1, + "confidence": 0.124 + }, + { + "text": "course", + "start": 12.1, + "end": 12.6, + "confidence": 0.86 + }, + { + "text": "she", + "start": 12.6, + "end": 12.88, + "confidence": 0.085 + }, + { + "text": "can.", + "start": 12.88, + "end": 13.34, + "confidence": 0.674 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 15.2, + "text": " No, this won't be for long.", + "tokens": [ + 51014, + 883, + 11, + 341, + 1582, + 380, + 312, + 337, + 938, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.65, + "words": [ + { + "text": "No,", + "start": 13.34, + "end": 13.78, + "confidence": 0.328 + }, + { + "text": "this", + "start": 13.9, + "end": 14.24, + "confidence": 0.419 + }, + { + "text": "won't", + "start": 14.24, + "end": 14.54, + "confidence": 0.917 + }, + { + "text": "be", + "start": 14.54, + "end": 14.68, + "confidence": 0.906 + }, + { + "text": "for", + "start": 14.68, + "end": 14.88, + "confidence": 0.884 + }, + { + "text": "long.", + "start": 14.88, + "end": 15.2, + "confidence": 0.823 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 15.2, + "end": 17.44, + "text": " Well, if you can stay as long as you want, my love.", + "tokens": [ + 51114, + 1042, + 11, + 498, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.69, + "words": [ + { + "text": "Well,", + "start": 15.2, + "end": 15.46, + "confidence": 0.553 + }, + { + "text": "if", + "start": 15.52, + "end": 15.6, + "confidence": 0.084 + }, + { + "text": "you", + "start": 15.6, + "end": 15.68, + "confidence": 0.898 + }, + { + "text": "can", + "start": 15.68, + "end": 15.8, + "confidence": 0.827 + }, + { + "text": "stay", + "start": 15.8, + "end": 16.04, + "confidence": 0.922 + }, + { + "text": "as", + "start": 16.04, + "end": 16.18, + "confidence": 0.896 + }, + { + "text": "long", + "start": 16.18, + "end": 16.32, + "confidence": 0.893 + }, + { + "text": "as", + "start": 16.32, + "end": 16.46, + "confidence": 0.902 + }, + { + "text": "you", + "start": 16.46, + "end": 16.58, + "confidence": 0.905 + }, + { + "text": "want,", + "start": 16.58, + "end": 16.76, + "confidence": 0.731 + }, + { + "text": "my", + "start": 16.88, + "end": 17.02, + "confidence": 0.901 + }, + { + "text": "love.", + "start": 17.02, + "end": 17.44, + "confidence": 0.831 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 17.44, + "end": 18.96, + "text": " I've really missed you.", + "tokens": [ + 51214, + 286, + 600, + 534, + 6721, + 291, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.529, + "words": [ + { + "text": "I've", + "start": 17.44, + "end": 17.58, + "confidence": 0.309 + }, + { + "text": "really", + "start": 17.58, + "end": 18.18, + "confidence": 0.763 + }, + { + "text": "missed", + "start": 18.18, + "end": 18.68, + "confidence": 0.778 + }, + { + "text": "you.", + "start": 18.68, + "end": 18.96, + "confidence": 0.622 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.58, + "end": 20.12, + "text": " Pops.", + "tokens": [ + 51314, + 430, + 3370, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.35854845368460325, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429647624492645, + "confidence": 0.02, + "words": [ + { + "text": "Pops.", + "start": 19.58, + "end": 20.12, + "confidence": 0.02 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/large-v2.efficient_gloria.mp3.words.json b/tests/expected/corner_cases/large-v2.efficient_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7c12cc458c90e3fc580d164c79add659722afb41 --- /dev/null +++ b/tests/expected/corner_cases/large-v2.efficient_gloria.mp3.words.json @@ -0,0 +1,564 @@ +{ + "text": " Ella, my glorious love. How are you? Oh, I'm okay. I will be. I said she could stay with us tomorrow, just until she feels better. Of course she can. No, this won't be for long. Well, if you can stay as long as you want, my love. I really missed you. I agree.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 4.02, + "text": " Ella, my glorious love.", + "tokens": [ + 50364, + 29261, + 11, + 452, + 24026, + 959, + 13, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.476, + "words": [ + { + "text": "Ella,", + "start": 0.0, + "end": 1.68, + "confidence": 0.127 + }, + { + "text": "my", + "start": 1.92, + "end": 2.1, + "confidence": 0.731 + }, + { + "text": "glorious", + "start": 2.1, + "end": 3.36, + "confidence": 0.625 + }, + { + "text": "love.", + "start": 3.36, + "end": 4.02, + "confidence": 0.881 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 5.46, + "end": 6.24, + "text": " How are you?", + "tokens": [ + 50564, + 1012, + 366, + 291, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.682, + "words": [ + { + "text": "How", + "start": 5.46, + "end": 5.62, + "confidence": 0.455 + }, + { + "text": "are", + "start": 5.62, + "end": 6.02, + "confidence": 0.892 + }, + { + "text": "you?", + "start": 6.02, + "end": 6.24, + "confidence": 0.783 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 6.32, + "end": 9.12, + "text": " Oh, I'm okay. I will be.", + "tokens": [ + 50664, + 876, + 11, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.729, + "words": [ + { + "text": "Oh,", + "start": 6.32, + "end": 6.66, + "confidence": 0.479 + }, + { + "text": "I'm", + "start": 6.74, + "end": 7.02, + "confidence": 0.971 + }, + { + "text": "okay.", + "start": 7.02, + "end": 7.7, + "confidence": 0.476 + }, + { + "text": "I", + "start": 8.14, + "end": 8.56, + "confidence": 0.711 + }, + { + "text": "will", + "start": 8.56, + "end": 8.82, + "confidence": 0.792 + }, + { + "text": "be.", + "start": 8.82, + "end": 9.12, + "confidence": 0.906 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 9.34, + "end": 11.42, + "text": " I said she could stay with us tomorrow, just until she feels better.", + "tokens": [ + 50814, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 11, + 445, + 1826, + 750, + 3417, + 1101, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.723, + "words": [ + { + "text": "I", + "start": 9.34, + "end": 9.48, + "confidence": 0.879 + }, + { + "text": "said", + "start": 9.48, + "end": 9.62, + "confidence": 0.862 + }, + { + "text": "she", + "start": 9.62, + "end": 9.76, + "confidence": 0.757 + }, + { + "text": "could", + "start": 9.76, + "end": 9.88, + "confidence": 0.802 + }, + { + "text": "stay", + "start": 9.88, + "end": 10.08, + "confidence": 0.92 + }, + { + "text": "with", + "start": 10.08, + "end": 10.22, + "confidence": 0.811 + }, + { + "text": "us", + "start": 10.22, + "end": 10.36, + "confidence": 0.846 + }, + { + "text": "tomorrow,", + "start": 10.36, + "end": 10.56, + "confidence": 0.514 + }, + { + "text": "just", + "start": 10.74, + "end": 10.76, + "confidence": 0.435 + }, + { + "text": "until", + "start": 10.76, + "end": 10.86, + "confidence": 0.436 + }, + { + "text": "she", + "start": 10.86, + "end": 10.98, + "confidence": 0.814 + }, + { + "text": "feels", + "start": 10.98, + "end": 11.16, + "confidence": 0.777 + }, + { + "text": "better.", + "start": 11.16, + "end": 11.42, + "confidence": 0.822 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 11.9, + "end": 13.06, + "text": " Of course she can.", + "tokens": [ + 50914, + 2720, + 1164, + 750, + 393, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.76, + "words": [ + { + "text": "Of", + "start": 11.9, + "end": 12.16, + "confidence": 0.562 + }, + { + "text": "course", + "start": 12.16, + "end": 12.54, + "confidence": 0.875 + }, + { + "text": "she", + "start": 12.54, + "end": 12.8, + "confidence": 0.753 + }, + { + "text": "can.", + "start": 12.8, + "end": 13.06, + "confidence": 0.899 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.7, + "end": 15.2, + "text": " No, this won't be for long.", + "tokens": [ + 51014, + 883, + 11, + 341, + 1582, + 380, + 312, + 337, + 938, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.773, + "words": [ + { + "text": "No,", + "start": 13.7, + "end": 13.72, + "confidence": 0.377 + }, + { + "text": "this", + "start": 13.92, + "end": 14.26, + "confidence": 0.7 + }, + { + "text": "won't", + "start": 14.26, + "end": 14.54, + "confidence": 0.943 + }, + { + "text": "be", + "start": 14.54, + "end": 14.68, + "confidence": 0.898 + }, + { + "text": "for", + "start": 14.68, + "end": 14.86, + "confidence": 0.891 + }, + { + "text": "long.", + "start": 14.86, + "end": 15.2, + "confidence": 0.877 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 15.3, + "end": 17.48, + "text": " Well, if you can stay as long as you want, my love.", + "tokens": [ + 51114, + 1042, + 11, + 498, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.835, + "words": [ + { + "text": "Well,", + "start": 15.3, + "end": 15.46, + "confidence": 0.646 + }, + { + "text": "if", + "start": 15.52, + "end": 15.6, + "confidence": 0.611 + }, + { + "text": "you", + "start": 15.6, + "end": 15.68, + "confidence": 0.893 + }, + { + "text": "can", + "start": 15.68, + "end": 15.82, + "confidence": 0.85 + }, + { + "text": "stay", + "start": 15.82, + "end": 16.04, + "confidence": 0.922 + }, + { + "text": "as", + "start": 16.04, + "end": 16.2, + "confidence": 0.885 + }, + { + "text": "long", + "start": 16.2, + "end": 16.32, + "confidence": 0.883 + }, + { + "text": "as", + "start": 16.32, + "end": 16.48, + "confidence": 0.907 + }, + { + "text": "you", + "start": 16.48, + "end": 16.58, + "confidence": 0.899 + }, + { + "text": "want,", + "start": 16.58, + "end": 16.76, + "confidence": 0.812 + }, + { + "text": "my", + "start": 16.9, + "end": 17.02, + "confidence": 0.9 + }, + { + "text": "love.", + "start": 17.02, + "end": 17.48, + "confidence": 0.896 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 17.66, + "end": 18.96, + "text": " I really missed you.", + "tokens": [ + 51214, + 286, + 534, + 6721, + 291, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.479, + "words": [ + { + "text": "I", + "start": 17.66, + "end": 17.86, + "confidence": 0.252 + }, + { + "text": "really", + "start": 17.86, + "end": 18.22, + "confidence": 0.447 + }, + { + "text": "missed", + "start": 18.22, + "end": 18.7, + "confidence": 0.653 + }, + { + "text": "you.", + "start": 18.7, + "end": 18.96, + "confidence": 0.717 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 19.58, + "end": 20.6, + "text": " I agree.", + "tokens": [ + 51314, + 286, + 3986, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.3614922408219222, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.24429722130298615, + "confidence": 0.422, + "words": [ + { + "text": "I", + "start": 19.58, + "end": 20.4, + "confidence": 0.404 + }, + { + "text": "agree.", + "start": 20.4, + "end": 20.6, + "confidence": 0.44 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/large_apollo11.mp3.words.json b/tests/expected/corner_cases/large_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..32f3b7bda79f301c9a8a93036908ce7e32a7a2d9 --- /dev/null +++ b/tests/expected/corner_cases/large_apollo11.mp3.words.json @@ -0,0 +1,1361 @@ +{ + "text": " Apollo 11, Houston. We got a recommendation for you on your DOJ's E-A limb, E-G-E-A's, over. Go ahead. Okay, we'd like to have, say, a selected one or two on the helmet. We're going to have B-1. And you can put the other one on Mike's helmet. We're still seeing the bleeper, over. We got a little bit on the helmet and B1. The other one might go under sleep restraint. We got them in their helmet bags and I guess, excuse me, the helmet bags. The leaflet bags. Roger. Roger, I'm taking the next A out of the CLS. Okay, we weren't sure that this was a suggestion. We thought we'd, you could check it out. So I guess whatever you come up with just let us know. Okay, no problem. Okay, no problem, we'll let you know where the end of the line is.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.36, + "end": 6.96, + "text": " Apollo 11, Houston. We got a recommendation for you on your DOJ's E-A limb, E-G-E-A's, over.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 13, + 492, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 10699, + 41, + 311, + 462, + 12, + 32, + 30390, + 11, + 462, + 12, + 38, + 12, + 36, + 12, + 32, + 311, + 11, + 670, + 13, + 50714 + ], + "temperature": 0.4, + "avg_logprob": -0.6358732926218134, + "compression_ratio": 1.443298969072165, + "no_speech_prob": 0.334128201007843, + "confidence": 0.595, + "words": [ + { + "text": "Apollo", + "start": 0.36, + "end": 0.92, + "confidence": 0.791 + }, + { + "text": "11,", + "start": 0.92, + "end": 1.32, + "confidence": 0.878 + }, + { + "text": "Houston.", + "start": 1.5, + "end": 1.74, + "confidence": 0.829 + }, + { + "text": "We", + "start": 1.9, + "end": 1.94, + "confidence": 0.836 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.645 + }, + { + "text": "a", + "start": 2.1, + "end": 2.3, + "confidence": 0.995 + }, + { + "text": "recommendation", + "start": 2.3, + "end": 3.08, + "confidence": 0.708 + }, + { + "text": "for", + "start": 3.08, + "end": 3.46, + "confidence": 0.856 + }, + { + "text": "you", + "start": 3.46, + "end": 3.62, + "confidence": 0.857 + }, + { + "text": "on", + "start": 3.62, + "end": 3.76, + "confidence": 0.87 + }, + { + "text": "your", + "start": 3.76, + "end": 4.06, + "confidence": 0.764 + }, + { + "text": "DOJ's", + "start": 4.06, + "end": 4.88, + "confidence": 0.162 + }, + { + "text": "E-A", + "start": 4.88, + "end": 5.3, + "confidence": 0.409 + }, + { + "text": "limb,", + "start": 5.3, + "end": 5.62, + "confidence": 0.263 + }, + { + "text": "E-G-E-A's,", + "start": 5.76, + "end": 6.8, + "confidence": 0.768 + }, + { + "text": "over.", + "start": 6.82, + "end": 6.96, + "confidence": 0.818 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 11.16, + "text": " Go ahead.", + "tokens": [ + 50914, + 1037, + 2286, + 13, + 50964 + ], + "temperature": 0.4, + "avg_logprob": -0.6358732926218134, + "compression_ratio": 1.443298969072165, + "no_speech_prob": 0.334128201007843, + "confidence": 0.781, + "words": [ + { + "text": "Go", + "start": 10.8, + "end": 10.98, + "confidence": 0.706 + }, + { + "text": "ahead.", + "start": 10.98, + "end": 11.16, + "confidence": 0.863 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 11.88, + "end": 19.12, + "text": " Okay, we'd like to have, say, a selected one or two on the helmet. We're going to have B-1.", + "tokens": [ + 50964, + 1033, + 11, + 321, + 1116, + 411, + 281, + 362, + 11, + 584, + 11, + 257, + 8209, + 472, + 420, + 732, + 322, + 264, + 15922, + 13, + 492, + 434, + 516, + 281, + 362, + 363, + 12, + 16, + 13, + 51314 + ], + "temperature": 0.4, + "avg_logprob": -0.6358732926218134, + "compression_ratio": 1.443298969072165, + "no_speech_prob": 0.334128201007843, + "confidence": 0.465, + "words": [ + { + "text": "Okay,", + "start": 11.88, + "end": 12.28, + "confidence": 0.467 + }, + { + "text": "we'd", + "start": 12.5, + "end": 13.16, + "confidence": 0.624 + }, + { + "text": "like", + "start": 13.16, + "end": 13.32, + "confidence": 0.876 + }, + { + "text": "to", + "start": 13.32, + "end": 13.56, + "confidence": 0.734 + }, + { + "text": "have,", + "start": 13.56, + "end": 14.24, + "confidence": 0.125 + }, + { + "text": "say,", + "start": 14.74, + "end": 15.0, + "confidence": 0.153 + }, + { + "text": "a", + "start": 15.1, + "end": 15.16, + "confidence": 0.061 + }, + { + "text": "selected", + "start": 15.16, + "end": 15.76, + "confidence": 0.143 + }, + { + "text": "one", + "start": 15.76, + "end": 16.08, + "confidence": 0.753 + }, + { + "text": "or", + "start": 16.08, + "end": 16.22, + "confidence": 0.662 + }, + { + "text": "two", + "start": 16.22, + "end": 16.36, + "confidence": 0.594 + }, + { + "text": "on", + "start": 16.36, + "end": 16.58, + "confidence": 0.877 + }, + { + "text": "the", + "start": 16.58, + "end": 16.82, + "confidence": 0.807 + }, + { + "text": "helmet.", + "start": 16.82, + "end": 17.32, + "confidence": 0.65 + }, + { + "text": "We're", + "start": 17.58, + "end": 17.78, + "confidence": 0.566 + }, + { + "text": "going", + "start": 17.78, + "end": 17.94, + "confidence": 0.493 + }, + { + "text": "to", + "start": 17.94, + "end": 18.06, + "confidence": 0.896 + }, + { + "text": "have", + "start": 18.06, + "end": 18.22, + "confidence": 0.841 + }, + { + "text": "B-1.", + "start": 18.22, + "end": 19.12, + "confidence": 0.413 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 20.08, + "end": 24.9, + "text": " And you can put the other one on Mike's helmet. We're still seeing the bleeper, over.", + "tokens": [ + 51364, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 6602, + 311, + 15922, + 13, + 492, + 434, + 920, + 2577, + 264, + 5408, + 595, + 260, + 11, + 670, + 13, + 51614 + ], + "temperature": 0.4, + "avg_logprob": -0.6358732926218134, + "compression_ratio": 1.443298969072165, + "no_speech_prob": 0.334128201007843, + "confidence": 0.559, + "words": [ + { + "text": "And", + "start": 20.08, + "end": 20.24, + "confidence": 0.834 + }, + { + "text": "you", + "start": 20.24, + "end": 20.34, + "confidence": 0.882 + }, + { + "text": "can", + "start": 20.34, + "end": 20.48, + "confidence": 0.476 + }, + { + "text": "put", + "start": 20.48, + "end": 20.66, + "confidence": 0.891 + }, + { + "text": "the", + "start": 20.66, + "end": 20.84, + "confidence": 0.816 + }, + { + "text": "other", + "start": 20.84, + "end": 21.02, + "confidence": 0.777 + }, + { + "text": "one", + "start": 21.02, + "end": 21.22, + "confidence": 0.778 + }, + { + "text": "on", + "start": 21.22, + "end": 21.46, + "confidence": 0.905 + }, + { + "text": "Mike's", + "start": 21.46, + "end": 22.72, + "confidence": 0.852 + }, + { + "text": "helmet.", + "start": 22.72, + "end": 22.86, + "confidence": 0.793 + }, + { + "text": "We're", + "start": 23.08, + "end": 23.14, + "confidence": 0.803 + }, + { + "text": "still", + "start": 23.14, + "end": 23.34, + "confidence": 0.819 + }, + { + "text": "seeing", + "start": 23.34, + "end": 23.64, + "confidence": 0.682 + }, + { + "text": "the", + "start": 23.64, + "end": 23.88, + "confidence": 0.103 + }, + { + "text": "bleeper,", + "start": 23.88, + "end": 24.28, + "confidence": 0.257 + }, + { + "text": "over.", + "start": 24.56, + "end": 24.9, + "confidence": 0.179 + } + ] + }, + { + "id": 4, + "seek": 3000, + "start": 31.18, + "end": 35.04, + "text": " We got a little bit on the helmet and B1.", + "tokens": [ + 50364, + 220, + 4360, + 658, + 257, + 707, + 857, + 322, + 264, + 15922, + 293, + 363, + 16, + 13, + 50714 + ], + "temperature": 0.4, + "avg_logprob": -0.8986218935483462, + "compression_ratio": 1.4807692307692308, + "no_speech_prob": 0.7222229242324829, + "confidence": 0.242, + "words": [ + { + "text": "We", + "start": 31.18, + "end": 31.62, + "confidence": 0.075 + }, + { + "text": "got", + "start": 31.62, + "end": 31.84, + "confidence": 0.153 + }, + { + "text": "a", + "start": 31.84, + "end": 33.32, + "confidence": 0.226 + }, + { + "text": "little", + "start": 33.32, + "end": 33.54, + "confidence": 0.177 + }, + { + "text": "bit", + "start": 33.54, + "end": 33.74, + "confidence": 0.645 + }, + { + "text": "on", + "start": 33.74, + "end": 33.94, + "confidence": 0.438 + }, + { + "text": "the", + "start": 33.94, + "end": 34.02, + "confidence": 0.108 + }, + { + "text": "helmet", + "start": 34.02, + "end": 34.22, + "confidence": 0.661 + }, + { + "text": "and", + "start": 34.22, + "end": 34.46, + "confidence": 0.483 + }, + { + "text": "B1.", + "start": 34.46, + "end": 35.04, + "confidence": 0.346 + } + ] + }, + { + "id": 5, + "seek": 3000, + "start": 37.56, + "end": 39.26, + "text": " The other one might go under sleep restraint.", + "tokens": [ + 50714, + 440, + 661, + 472, + 1062, + 352, + 833, + 2817, + 49281, + 13, + 50864 + ], + "temperature": 0.4, + "avg_logprob": -0.8986218935483462, + "compression_ratio": 1.4807692307692308, + "no_speech_prob": 0.7222229242324829, + "confidence": 0.591, + "words": [ + { + "text": "The", + "start": 37.56, + "end": 37.86, + "confidence": 0.529 + }, + { + "text": "other", + "start": 37.86, + "end": 37.9, + "confidence": 0.754 + }, + { + "text": "one", + "start": 37.9, + "end": 37.92, + "confidence": 0.693 + }, + { + "text": "might", + "start": 37.92, + "end": 37.94, + "confidence": 0.719 + }, + { + "text": "go", + "start": 37.94, + "end": 38.08, + "confidence": 0.717 + }, + { + "text": "under", + "start": 38.08, + "end": 38.34, + "confidence": 0.529 + }, + { + "text": "sleep", + "start": 38.34, + "end": 38.76, + "confidence": 0.416 + }, + { + "text": "restraint.", + "start": 38.76, + "end": 39.26, + "confidence": 0.476 + } + ] + }, + { + "id": 6, + "seek": 3000, + "start": 39.78, + "end": 47.42, + "text": " We got them in their helmet bags and I guess, excuse me, the helmet bags.", + "tokens": [ + 50864, + 492, + 658, + 552, + 294, + 641, + 15922, + 10405, + 293, + 286, + 2041, + 11, + 8960, + 385, + 11, + 264, + 15922, + 10405, + 13, + 51264 + ], + "temperature": 0.4, + "avg_logprob": -0.8986218935483462, + "compression_ratio": 1.4807692307692308, + "no_speech_prob": 0.7222229242324829, + "confidence": 0.454, + "words": [ + { + "text": "We", + "start": 39.78, + "end": 40.26, + "confidence": 0.752 + }, + { + "text": "got", + "start": 40.26, + "end": 40.5, + "confidence": 0.406 + }, + { + "text": "them", + "start": 40.5, + "end": 40.64, + "confidence": 0.503 + }, + { + "text": "in", + "start": 40.64, + "end": 40.78, + "confidence": 0.877 + }, + { + "text": "their", + "start": 40.78, + "end": 41.06, + "confidence": 0.695 + }, + { + "text": "helmet", + "start": 41.06, + "end": 41.58, + "confidence": 0.699 + }, + { + "text": "bags", + "start": 41.58, + "end": 42.08, + "confidence": 0.43 + }, + { + "text": "and", + "start": 42.08, + "end": 43.48, + "confidence": 0.264 + }, + { + "text": "I", + "start": 43.48, + "end": 44.02, + "confidence": 0.371 + }, + { + "text": "guess,", + "start": 44.02, + "end": 44.24, + "confidence": 0.447 + }, + { + "text": "excuse", + "start": 44.44, + "end": 45.06, + "confidence": 0.36 + }, + { + "text": "me,", + "start": 45.06, + "end": 45.3, + "confidence": 0.84 + }, + { + "text": "the", + "start": 46.14, + "end": 46.16, + "confidence": 0.122 + }, + { + "text": "helmet", + "start": 46.16, + "end": 46.5, + "confidence": 0.364 + }, + { + "text": "bags.", + "start": 46.5, + "end": 47.42, + "confidence": 0.435 + } + ] + }, + { + "id": 7, + "seek": 3000, + "start": 48.18, + "end": 49.56, + "text": " The leaflet bags.", + "tokens": [ + 51264, + 440, + 476, + 64, + 69, + 2631, + 10405, + 13, + 51364 + ], + "temperature": 0.4, + "avg_logprob": -0.8986218935483462, + "compression_ratio": 1.4807692307692308, + "no_speech_prob": 0.7222229242324829, + "confidence": 0.316, + "words": [ + { + "text": "The", + "start": 48.18, + "end": 48.46, + "confidence": 0.618 + }, + { + "text": "leaflet", + "start": 48.46, + "end": 48.82, + "confidence": 0.278 + }, + { + "text": "bags.", + "start": 48.82, + "end": 49.56, + "confidence": 0.269 + } + ] + }, + { + "id": 8, + "seek": 3000, + "start": 50.06, + "end": 50.5, + "text": " Roger.", + "tokens": [ + 51364, + 17666, + 13, + 51414 + ], + "temperature": 0.4, + "avg_logprob": -0.8986218935483462, + "compression_ratio": 1.4807692307692308, + "no_speech_prob": 0.7222229242324829, + "confidence": 0.538, + "words": [ + { + "text": "Roger.", + "start": 50.06, + "end": 50.5, + "confidence": 0.538 + } + ] + }, + { + "id": 9, + "seek": 3000, + "start": 51.38, + "end": 55.38, + "text": " Roger, I'm taking the next A out of the CLS.", + "tokens": [ + 51414, + 17666, + 11, + 286, + 478, + 1940, + 264, + 958, + 316, + 484, + 295, + 264, + 12855, + 50, + 13, + 51664 + ], + "temperature": 0.4, + "avg_logprob": -0.8986218935483462, + "compression_ratio": 1.4807692307692308, + "no_speech_prob": 0.7222229242324829, + "confidence": 0.298, + "words": [ + { + "text": "Roger,", + "start": 51.38, + "end": 51.86, + "confidence": 0.451 + }, + { + "text": "I'm", + "start": 51.98, + "end": 52.06, + "confidence": 0.765 + }, + { + "text": "taking", + "start": 52.06, + "end": 52.28, + "confidence": 0.611 + }, + { + "text": "the", + "start": 52.28, + "end": 53.84, + "confidence": 0.106 + }, + { + "text": "next", + "start": 53.84, + "end": 53.94, + "confidence": 0.128 + }, + { + "text": "A", + "start": 53.94, + "end": 54.1, + "confidence": 0.181 + }, + { + "text": "out", + "start": 54.1, + "end": 54.26, + "confidence": 0.178 + }, + { + "text": "of", + "start": 54.26, + "end": 54.38, + "confidence": 0.356 + }, + { + "text": "the", + "start": 54.38, + "end": 54.6, + "confidence": 0.169 + }, + { + "text": "CLS.", + "start": 54.6, + "end": 55.38, + "confidence": 0.342 + } + ] + }, + { + "id": 10, + "seek": 5600, + "start": 56.0, + "end": 61.92, + "text": " Okay, we weren't sure that this was a suggestion.", + "tokens": [ + 50364, + 1033, + 11, + 321, + 4999, + 380, + 988, + 300, + 341, + 390, + 257, + 16541, + 13, + 50714 + ], + "temperature": 0.4, + "avg_logprob": -0.7243160101083609, + "compression_ratio": 1.2635658914728682, + "no_speech_prob": 0.6283825635910034, + "confidence": 0.454, + "words": [ + { + "text": "Okay,", + "start": 56.0, + "end": 56.62, + "confidence": 0.097 + }, + { + "text": "we", + "start": 56.72, + "end": 56.76, + "confidence": 0.236 + }, + { + "text": "weren't", + "start": 56.76, + "end": 57.16, + "confidence": 0.727 + }, + { + "text": "sure", + "start": 57.16, + "end": 58.84, + "confidence": 0.633 + }, + { + "text": "that", + "start": 58.84, + "end": 60.26, + "confidence": 0.436 + }, + { + "text": "this", + "start": 60.26, + "end": 61.28, + "confidence": 0.557 + }, + { + "text": "was", + "start": 61.28, + "end": 61.56, + "confidence": 0.599 + }, + { + "text": "a", + "start": 61.56, + "end": 61.6, + "confidence": 0.71 + }, + { + "text": "suggestion.", + "start": 61.6, + "end": 61.92, + "confidence": 0.472 + } + ] + }, + { + "id": 11, + "seek": 5600, + "start": 65.2, + "end": 67.82, + "text": " We thought we'd, you could check it out.", + "tokens": [ + 50714, + 492, + 1194, + 321, + 1116, + 11, + 220, + 5616, + 727, + 1520, + 309, + 484, + 13, + 50964 + ], + "temperature": 0.4, + "avg_logprob": -0.7243160101083609, + "compression_ratio": 1.2635658914728682, + "no_speech_prob": 0.6283825635910034, + "confidence": 0.526, + "words": [ + { + "text": "We", + "start": 65.2, + "end": 65.38, + "confidence": 0.848 + }, + { + "text": "thought", + "start": 65.38, + "end": 65.56, + "confidence": 0.802 + }, + { + "text": "we'd,", + "start": 65.56, + "end": 65.86, + "confidence": 0.537 + }, + { + "text": "you", + "start": 66.32, + "end": 67.04, + "confidence": 0.154 + }, + { + "text": "could", + "start": 67.04, + "end": 67.22, + "confidence": 0.617 + }, + { + "text": "check", + "start": 67.22, + "end": 67.44, + "confidence": 0.774 + }, + { + "text": "it", + "start": 67.44, + "end": 67.58, + "confidence": 0.899 + }, + { + "text": "out.", + "start": 67.58, + "end": 67.82, + "confidence": 0.813 + } + ] + }, + { + "id": 12, + "seek": 5600, + "start": 69.4, + "end": 72.44, + "text": " So I guess whatever you come up with just let us know.", + "tokens": [ + 50964, + 407, + 286, + 2041, + 2035, + 291, + 808, + 493, + 365, + 445, + 718, + 505, + 458, + 13, + 51214 + ], + "temperature": 0.4, + "avg_logprob": -0.7243160101083609, + "compression_ratio": 1.2635658914728682, + "no_speech_prob": 0.6283825635910034, + "confidence": 0.609, + "words": [ + { + "text": "So", + "start": 69.4, + "end": 69.46, + "confidence": 0.173 + }, + { + "text": "I", + "start": 69.46, + "end": 69.84, + "confidence": 0.48 + }, + { + "text": "guess", + "start": 69.84, + "end": 70.68, + "confidence": 0.887 + }, + { + "text": "whatever", + "start": 70.68, + "end": 71.12, + "confidence": 0.433 + }, + { + "text": "you", + "start": 71.12, + "end": 71.26, + "confidence": 0.882 + }, + { + "text": "come", + "start": 71.26, + "end": 71.42, + "confidence": 0.646 + }, + { + "text": "up", + "start": 71.42, + "end": 71.62, + "confidence": 0.893 + }, + { + "text": "with", + "start": 71.62, + "end": 71.8, + "confidence": 0.808 + }, + { + "text": "just", + "start": 71.8, + "end": 71.98, + "confidence": 0.309 + }, + { + "text": "let", + "start": 71.98, + "end": 72.12, + "confidence": 0.901 + }, + { + "text": "us", + "start": 72.12, + "end": 72.26, + "confidence": 0.899 + }, + { + "text": "know.", + "start": 72.26, + "end": 72.44, + "confidence": 0.789 + } + ] + }, + { + "id": 13, + "seek": 5600, + "start": 74.12, + "end": 75.11, + "text": " Okay, no problem.", + "tokens": [ + 51214, + 1033, + 11, + 572, + 1154, + 13, + 51314 + ], + "temperature": 0.4, + "avg_logprob": -0.7243160101083609, + "compression_ratio": 1.2635658914728682, + "no_speech_prob": 0.6283825635910034, + "confidence": 0.678, + "words": [ + { + "text": "Okay,", + "start": 74.12, + "end": 74.5, + "confidence": 0.676 + }, + { + "text": "no", + "start": 74.6, + "end": 74.8, + "confidence": 0.591 + }, + { + "text": "problem.", + "start": 74.8, + "end": 75.11, + "confidence": 0.78 + } + ] + }, + { + "id": 14, + "seek": 7500, + "start": 75.11, + "end": 78.48, + "text": " Okay, no problem, we'll let you know where the end of the line is.", + "tokens": [ + 50364, + 1033, + 11, + 572, + 1154, + 11, + 321, + 603, + 718, + 291, + 458, + 689, + 264, + 917, + 295, + 264, + 1622, + 307, + 13, + 50564 + ], + "temperature": 0.4, + "avg_logprob": -0.5957319622948056, + "compression_ratio": 0.9565217391304348, + "no_speech_prob": 0.16674424707889557, + "confidence": 0.57, + "words": [ + { + "text": "Okay,", + "start": 75.11, + "end": 75.84, + "confidence": 0.243 + }, + { + "text": "no", + "start": 76.0, + "end": 76.2, + "confidence": 0.336 + }, + { + "text": "problem,", + "start": 76.2, + "end": 76.54, + "confidence": 0.687 + }, + { + "text": "we'll", + "start": 76.64, + "end": 76.82, + "confidence": 0.753 + }, + { + "text": "let", + "start": 76.82, + "end": 76.98, + "confidence": 0.87 + }, + { + "text": "you", + "start": 76.98, + "end": 77.06, + "confidence": 0.667 + }, + { + "text": "know", + "start": 77.06, + "end": 77.2, + "confidence": 0.828 + }, + { + "text": "where", + "start": 77.2, + "end": 77.32, + "confidence": 0.261 + }, + { + "text": "the", + "start": 77.32, + "end": 77.42, + "confidence": 0.614 + }, + { + "text": "end", + "start": 77.42, + "end": 77.54, + "confidence": 0.593 + }, + { + "text": "of", + "start": 77.54, + "end": 77.82, + "confidence": 0.814 + }, + { + "text": "the", + "start": 77.82, + "end": 78.08, + "confidence": 0.26 + }, + { + "text": "line", + "start": 78.08, + "end": 78.3, + "confidence": 0.838 + }, + { + "text": "is.", + "start": 78.3, + "end": 78.48, + "confidence": 0.852 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/medium.accurate_gloria.mp3.words.json b/tests/expected/corner_cases/medium.accurate_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9dc08b63a832a3e8ecfa68f23cc901da5a6cba61 --- /dev/null +++ b/tests/expected/corner_cases/medium.accurate_gloria.mp3.words.json @@ -0,0 +1,544 @@ +{ + "text": " Hello. How are you? How are you? Oh, I'm okay. I will be. I said she could stay with us tomorrow until she feels better. Of course she can. This won't be for long. Well, you can stay as long as you want, my love. I really miss you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.32, + "end": 1.82, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.189, + "words": [ + { + "text": "Hello.", + "start": 1.32, + "end": 1.82, + "confidence": 0.189 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.94, + "end": 3.54, + "text": " How are you?", + "tokens": [ + 50464, + 1012, + 366, + 291, + 30, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.157, + "words": [ + { + "text": "How", + "start": 1.94, + "end": 2.6, + "confidence": 0.004 + }, + { + "text": "are", + "start": 2.6, + "end": 3.1, + "confidence": 0.238 + }, + { + "text": "you?", + "start": 3.1, + "end": 3.54, + "confidence": 0.809 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.7, + "end": 6.38, + "text": " How are you?", + "tokens": [ + 50564, + 1012, + 366, + 291, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.353, + "words": [ + { + "text": "How", + "start": 3.7, + "end": 4.3, + "confidence": 0.021 + }, + { + "text": "are", + "start": 4.3, + "end": 6.12, + "confidence": 0.93 + }, + { + "text": "you?", + "start": 6.12, + "end": 6.38, + "confidence": 0.897 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 6.38, + "end": 7.74, + "text": " Oh, I'm okay.", + "tokens": [ + 50664, + 876, + 11, + 286, + 478, + 1392, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.63, + "words": [ + { + "text": "Oh,", + "start": 6.38, + "end": 6.66, + "confidence": 0.37 + }, + { + "text": "I'm", + "start": 6.76, + "end": 7.18, + "confidence": 0.979 + }, + { + "text": "okay.", + "start": 7.18, + "end": 7.74, + "confidence": 0.692 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 7.74, + "end": 9.2, + "text": " I will be.", + "tokens": [ + 50764, + 286, + 486, + 312, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.543, + "words": [ + { + "text": "I", + "start": 7.74, + "end": 8.64, + "confidence": 0.468 + }, + { + "text": "will", + "start": 8.64, + "end": 8.88, + "confidence": 0.582 + }, + { + "text": "be.", + "start": 8.88, + "end": 9.2, + "confidence": 0.565 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.5, + "end": 11.56, + "text": " I said she could stay with us tomorrow until she feels better.", + "tokens": [ + 50864, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 1826, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.494, + "words": [ + { + "text": "I", + "start": 9.5, + "end": 9.52, + "confidence": 0.283 + }, + { + "text": "said", + "start": 9.52, + "end": 9.64, + "confidence": 0.561 + }, + { + "text": "she", + "start": 9.64, + "end": 9.78, + "confidence": 0.919 + }, + { + "text": "could", + "start": 9.78, + "end": 9.98, + "confidence": 0.898 + }, + { + "text": "stay", + "start": 9.98, + "end": 10.16, + "confidence": 0.993 + }, + { + "text": "with", + "start": 10.16, + "end": 10.28, + "confidence": 0.991 + }, + { + "text": "us", + "start": 10.28, + "end": 10.46, + "confidence": 0.966 + }, + { + "text": "tomorrow", + "start": 10.46, + "end": 10.68, + "confidence": 0.004 + }, + { + "text": "until", + "start": 10.68, + "end": 10.84, + "confidence": 0.312 + }, + { + "text": "she", + "start": 10.84, + "end": 10.98, + "confidence": 0.997 + }, + { + "text": "feels", + "start": 10.98, + "end": 11.2, + "confidence": 0.988 + }, + { + "text": "better.", + "start": 11.2, + "end": 11.56, + "confidence": 0.825 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 11.56, + "end": 13.42, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.31, + "words": [ + { + "text": "Of", + "start": 11.56, + "end": 12.54, + "confidence": 0.09 + }, + { + "text": "course", + "start": 12.54, + "end": 12.72, + "confidence": 0.985 + }, + { + "text": "she", + "start": 12.72, + "end": 13.04, + "confidence": 0.209 + }, + { + "text": "can.", + "start": 13.04, + "end": 13.42, + "confidence": 0.392 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.54, + "end": 15.24, + "text": " This won't be for long.", + "tokens": [ + 51064, + 639, + 1582, + 380, + 312, + 337, + 938, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.444, + "words": [ + { + "text": "This", + "start": 13.54, + "end": 14.16, + "confidence": 0.007 + }, + { + "text": "won't", + "start": 14.16, + "end": 14.58, + "confidence": 0.926 + }, + { + "text": "be", + "start": 14.58, + "end": 14.8, + "confidence": 0.99 + }, + { + "text": "for", + "start": 14.8, + "end": 14.98, + "confidence": 0.978 + }, + { + "text": "long.", + "start": 14.98, + "end": 15.24, + "confidence": 0.765 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.5, + "end": 17.62, + "text": " Well, you can stay as long as you want, my love.", + "tokens": [ + 51164, + 1042, + 11, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.587, + "words": [ + { + "text": "Well,", + "start": 15.5, + "end": 15.52, + "confidence": 0.073 + }, + { + "text": "you", + "start": 15.52, + "end": 15.66, + "confidence": 0.491 + }, + { + "text": "can", + "start": 15.66, + "end": 15.88, + "confidence": 0.876 + }, + { + "text": "stay", + "start": 15.88, + "end": 16.12, + "confidence": 0.932 + }, + { + "text": "as", + "start": 16.12, + "end": 16.28, + "confidence": 0.979 + }, + { + "text": "long", + "start": 16.28, + "end": 16.4, + "confidence": 0.995 + }, + { + "text": "as", + "start": 16.4, + "end": 16.54, + "confidence": 0.995 + }, + { + "text": "you", + "start": 16.54, + "end": 16.62, + "confidence": 0.995 + }, + { + "text": "want,", + "start": 16.62, + "end": 16.8, + "confidence": 0.643 + }, + { + "text": "my", + "start": 16.98, + "end": 17.2, + "confidence": 0.958 + }, + { + "text": "love.", + "start": 17.2, + "end": 17.62, + "confidence": 0.835 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.62, + "end": 19.52, + "text": " I really miss you.", + "tokens": [ + 51264, + 286, + 534, + 1713, + 291, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.21419989641974954, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.0501624159514904, + "confidence": 0.477, + "words": [ + { + "text": "I", + "start": 17.62, + "end": 17.94, + "confidence": 0.495 + }, + { + "text": "really", + "start": 17.94, + "end": 18.3, + "confidence": 0.89 + }, + { + "text": "miss", + "start": 18.3, + "end": 18.78, + "confidence": 0.338 + }, + { + "text": "you.", + "start": 18.78, + "end": 19.52, + "confidence": 0.408 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/medium.efficient_gloria.mp3.words.json b/tests/expected/corner_cases/medium.efficient_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..ed7ba56f8293122ca0c772faa160d91738a24a74 --- /dev/null +++ b/tests/expected/corner_cases/medium.efficient_gloria.mp3.words.json @@ -0,0 +1,538 @@ +{ + "text": " Hello. How are you? Love. How are you? I'm okay. I will be. I said she could stay with us tomorrow, she feels better. Of course she can. This won't be for long. Well, you can stay as long as you want, my love. I really miss you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.4, + "end": 1.74, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.285, + "words": [ + { + "text": "Hello.", + "start": 1.4, + "end": 1.74, + "confidence": 0.285 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 2.32, + "end": 3.5, + "text": " How are you?", + "tokens": [ + 50464, + 1012, + 366, + 291, + 30, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.784, + "words": [ + { + "text": "How", + "start": 2.32, + "end": 2.82, + "confidence": 0.548 + }, + { + "text": "are", + "start": 2.82, + "end": 3.24, + "confidence": 0.989 + }, + { + "text": "you?", + "start": 3.24, + "end": 3.5, + "confidence": 0.89 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.5, + "end": 4.26, + "text": " Love.", + "tokens": [ + 50564, + 5956, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.243, + "words": [ + { + "text": "Love.", + "start": 3.5, + "end": 4.26, + "confidence": 0.243 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 5.5, + "end": 6.38, + "text": " How are you?", + "tokens": [ + 50664, + 1012, + 366, + 291, + 30, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.927, + "words": [ + { + "text": "How", + "start": 5.5, + "end": 5.68, + "confidence": 0.851 + }, + { + "text": "are", + "start": 5.68, + "end": 6.08, + "confidence": 0.998 + }, + { + "text": "you?", + "start": 6.08, + "end": 6.38, + "confidence": 0.937 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 7.5, + "end": 9.18, + "text": " I'm okay. I will be.", + "tokens": [ + 50764, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.769, + "words": [ + { + "text": "I'm", + "start": 7.5, + "end": 7.52, + "confidence": 0.766 + }, + { + "text": "okay.", + "start": 7.52, + "end": 8.22, + "confidence": 0.488 + }, + { + "text": "I", + "start": 8.42, + "end": 8.64, + "confidence": 0.765 + }, + { + "text": "will", + "start": 8.64, + "end": 8.94, + "confidence": 0.949 + }, + { + "text": "be.", + "start": 8.94, + "end": 9.18, + "confidence": 0.997 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.5, + "end": 11.52, + "text": " I said she could stay with us tomorrow, she feels better.", + "tokens": [ + 50864, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 11, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.786, + "words": [ + { + "text": "I", + "start": 9.5, + "end": 9.52, + "confidence": 0.884 + }, + { + "text": "said", + "start": 9.52, + "end": 9.64, + "confidence": 0.858 + }, + { + "text": "she", + "start": 9.64, + "end": 9.78, + "confidence": 0.928 + }, + { + "text": "could", + "start": 9.78, + "end": 9.96, + "confidence": 0.91 + }, + { + "text": "stay", + "start": 9.96, + "end": 10.16, + "confidence": 0.994 + }, + { + "text": "with", + "start": 10.16, + "end": 10.28, + "confidence": 0.977 + }, + { + "text": "us", + "start": 10.28, + "end": 10.46, + "confidence": 0.993 + }, + { + "text": "tomorrow,", + "start": 10.46, + "end": 10.66, + "confidence": 0.512 + }, + { + "text": "she", + "start": 10.72, + "end": 10.86, + "confidence": 0.278 + }, + { + "text": "feels", + "start": 10.86, + "end": 11.16, + "confidence": 0.81 + }, + { + "text": "better.", + "start": 11.16, + "end": 11.52, + "confidence": 0.992 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 12.06, + "end": 13.36, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.928, + "words": [ + { + "text": "Of", + "start": 12.06, + "end": 12.34, + "confidence": 0.788 + }, + { + "text": "course", + "start": 12.34, + "end": 12.56, + "confidence": 0.989 + }, + { + "text": "she", + "start": 12.56, + "end": 12.88, + "confidence": 0.959 + }, + { + "text": "can.", + "start": 12.88, + "end": 13.36, + "confidence": 0.993 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.5, + "end": 15.26, + "text": " This won't be for long.", + "tokens": [ + 51064, + 639, + 1582, + 380, + 312, + 337, + 938, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.892, + "words": [ + { + "text": "This", + "start": 13.5, + "end": 14.28, + "confidence": 0.545 + }, + { + "text": "won't", + "start": 14.28, + "end": 14.6, + "confidence": 0.975 + }, + { + "text": "be", + "start": 14.6, + "end": 14.8, + "confidence": 0.996 + }, + { + "text": "for", + "start": 14.8, + "end": 14.96, + "confidence": 0.983 + }, + { + "text": "long.", + "start": 14.96, + "end": 15.26, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.5, + "end": 17.62, + "text": " Well, you can stay as long as you want, my love.", + "tokens": [ + 51164, + 1042, + 11, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.927, + "words": [ + { + "text": "Well,", + "start": 15.5, + "end": 15.56, + "confidence": 0.674 + }, + { + "text": "you", + "start": 15.68, + "end": 15.7, + "confidence": 0.744 + }, + { + "text": "can", + "start": 15.7, + "end": 15.94, + "confidence": 0.982 + }, + { + "text": "stay", + "start": 15.94, + "end": 16.14, + "confidence": 0.985 + }, + { + "text": "as", + "start": 16.14, + "end": 16.28, + "confidence": 0.972 + }, + { + "text": "long", + "start": 16.28, + "end": 16.4, + "confidence": 0.994 + }, + { + "text": "as", + "start": 16.4, + "end": 16.48, + "confidence": 0.996 + }, + { + "text": "you", + "start": 16.48, + "end": 16.62, + "confidence": 0.995 + }, + { + "text": "want,", + "start": 16.62, + "end": 16.8, + "confidence": 0.995 + }, + { + "text": "my", + "start": 16.88, + "end": 17.22, + "confidence": 0.949 + }, + { + "text": "love.", + "start": 17.22, + "end": 17.62, + "confidence": 0.993 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.76, + "end": 19.26, + "text": " I really miss you.", + "tokens": [ + 51264, + 286, + 534, + 1713, + 291, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.792, + "words": [ + { + "text": "I", + "start": 17.76, + "end": 17.98, + "confidence": 0.587 + }, + { + "text": "really", + "start": 17.98, + "end": 18.36, + "confidence": 0.943 + }, + { + "text": "miss", + "start": 18.36, + "end": 18.78, + "confidence": 0.903 + }, + { + "text": "you.", + "start": 18.78, + "end": 19.26, + "confidence": 0.787 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/nocond.random_music.mp4.words.json b/tests/expected/corner_cases/nocond.random_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f06d275035a63608dd63d45f0e1f4a80d5424924 --- /dev/null +++ b/tests/expected/corner_cases/nocond.random_music.mp4.words.json @@ -0,0 +1,2847 @@ +{ + "text": " I As you're praying for my phone And the laughter in the holes And the names that I've been called I stack it in my mind And I'm waiting for the time When I show you what it's like To be worse than in a mine Tell you, you're the greatest But once you turn they hate us Oh, the misery Everybody wants to be my enemy I smell the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay, I'm Hoping that somebody pray for me I'm praying that somebody vote for me I'm staying where nobody's supposed to be I propose to be in a wreck of emotions Ready to go whenever she let me know The road is long so put the pedal into the flow The enemy on my trail, my energy unavailable I'ma tell the master the way go Hey, when I'm flat on my track to the top I been out of shape, thinking that I'm a box of an astronaut Blasted off the planet, rocked the cars, catastrophic And it matters more because I had it, now I had it I thought about wreaking havoc on an opposition Kinda shockin', they want it static, with precision I'm automatic, quarterback, I ain't talkin' second Pack it, pack it up, I don't panic, better batter up Who the baddest, it don't matter cause we is your Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Oh, the misery Everybody wants to be my enemy Spell the sympathy Everybody wants to be my enemy Everywhere I swear I'll never be a saint Nowhere my enemy Everywhere I swear I'll never be a saint Look out for yourself", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 2.24, + "end": 2.36, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.9355003237724304, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7811808586120605, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 2.24, + "end": 2.36, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 9000, + "start": 90.18, + "end": 91.7, + "text": " As you're praying for my phone", + "tokens": [ + 50364, + 1018, + 291, + 434, + 15611, + 337, + 452, + 2593, + 50446 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.716, + "words": [ + { + "text": "As", + "start": 90.18, + "end": 90.32, + "confidence": 0.357 + }, + { + "text": "you're", + "start": 90.32, + "end": 90.46, + "confidence": 0.893 + }, + { + "text": "praying", + "start": 90.46, + "end": 90.88, + "confidence": 0.966 + }, + { + "text": "for", + "start": 90.88, + "end": 91.12, + "confidence": 0.994 + }, + { + "text": "my", + "start": 91.12, + "end": 91.3, + "confidence": 0.997 + }, + { + "text": "phone", + "start": 91.3, + "end": 91.7, + "confidence": 0.353 + } + ] + }, + { + "id": 2, + "seek": 9000, + "start": 91.78, + "end": 93.22, + "text": " And the laughter in the holes", + "tokens": [ + 50446, + 400, + 264, + 13092, + 294, + 264, + 8118, + 50524 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.864, + "words": [ + { + "text": "And", + "start": 91.78, + "end": 91.92, + "confidence": 0.885 + }, + { + "text": "the", + "start": 91.92, + "end": 92.1, + "confidence": 0.99 + }, + { + "text": "laughter", + "start": 92.1, + "end": 92.5, + "confidence": 0.993 + }, + { + "text": "in", + "start": 92.5, + "end": 92.78, + "confidence": 0.895 + }, + { + "text": "the", + "start": 92.78, + "end": 92.92, + "confidence": 0.995 + }, + { + "text": "holes", + "start": 92.92, + "end": 93.22, + "confidence": 0.538 + } + ] + }, + { + "id": 3, + "seek": 9000, + "start": 93.34, + "end": 94.76, + "text": " And the names that I've been called", + "tokens": [ + 50524, + 400, + 264, + 5288, + 300, + 286, + 600, + 668, + 1219, + 50604 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.97, + "words": [ + { + "text": "And", + "start": 93.34, + "end": 93.48, + "confidence": 0.956 + }, + { + "text": "the", + "start": 93.48, + "end": 93.6, + "confidence": 0.994 + }, + { + "text": "names", + "start": 93.6, + "end": 93.86, + "confidence": 0.989 + }, + { + "text": "that", + "start": 93.86, + "end": 94.14, + "confidence": 0.982 + }, + { + "text": "I've", + "start": 94.14, + "end": 94.32, + "confidence": 0.946 + }, + { + "text": "been", + "start": 94.32, + "end": 94.5, + "confidence": 0.998 + }, + { + "text": "called", + "start": 94.5, + "end": 94.76, + "confidence": 0.955 + } + ] + }, + { + "id": 4, + "seek": 9000, + "start": 94.98, + "end": 96.38, + "text": " I stack it in my mind", + "tokens": [ + 50604, + 286, + 8630, + 309, + 294, + 452, + 1575, + 50681 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.96, + "words": [ + { + "text": "I", + "start": 94.98, + "end": 95.14, + "confidence": 0.991 + }, + { + "text": "stack", + "start": 95.14, + "end": 95.46, + "confidence": 0.802 + }, + { + "text": "it", + "start": 95.46, + "end": 95.72, + "confidence": 0.996 + }, + { + "text": "in", + "start": 95.72, + "end": 95.8, + "confidence": 0.996 + }, + { + "text": "my", + "start": 95.8, + "end": 96.1, + "confidence": 0.995 + }, + { + "text": "mind", + "start": 96.1, + "end": 96.38, + "confidence": 0.999 + } + ] + }, + { + "id": 5, + "seek": 9000, + "start": 96.48, + "end": 97.82, + "text": " And I'm waiting for the time", + "tokens": [ + 50681, + 400, + 286, + 478, + 3806, + 337, + 264, + 565, + 50756 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.989, + "words": [ + { + "text": "And", + "start": 96.48, + "end": 96.6, + "confidence": 0.971 + }, + { + "text": "I'm", + "start": 96.6, + "end": 96.74, + "confidence": 0.994 + }, + { + "text": "waiting", + "start": 96.74, + "end": 97.14, + "confidence": 0.978 + }, + { + "text": "for", + "start": 97.14, + "end": 97.34, + "confidence": 0.997 + }, + { + "text": "the", + "start": 97.34, + "end": 97.54, + "confidence": 0.996 + }, + { + "text": "time", + "start": 97.54, + "end": 97.82, + "confidence": 0.997 + } + ] + }, + { + "id": 6, + "seek": 9000, + "start": 98.0, + "end": 99.44, + "text": " When I show you what it's like", + "tokens": [ + 50756, + 1133, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 50834 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.98, + "words": [ + { + "text": "When", + "start": 98.0, + "end": 98.14, + "confidence": 0.968 + }, + { + "text": "I", + "start": 98.14, + "end": 98.32, + "confidence": 0.996 + }, + { + "text": "show", + "start": 98.32, + "end": 98.5, + "confidence": 0.907 + }, + { + "text": "you", + "start": 98.5, + "end": 98.76, + "confidence": 0.994 + }, + { + "text": "what", + "start": 98.76, + "end": 98.94, + "confidence": 0.994 + }, + { + "text": "it's", + "start": 98.94, + "end": 99.24, + "confidence": 0.991 + }, + { + "text": "like", + "start": 99.24, + "end": 99.44, + "confidence": 0.999 + } + ] + }, + { + "id": 7, + "seek": 9000, + "start": 99.52, + "end": 101.26, + "text": " To be worse than in a mine", + "tokens": [ + 50834, + 1407, + 312, + 5324, + 813, + 294, + 257, + 3892, + 50923 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.481, + "words": [ + { + "text": "To", + "start": 99.52, + "end": 99.72, + "confidence": 0.987 + }, + { + "text": "be", + "start": 99.72, + "end": 99.84, + "confidence": 0.999 + }, + { + "text": "worse", + "start": 99.84, + "end": 100.1, + "confidence": 0.472 + }, + { + "text": "than", + "start": 100.1, + "end": 100.38, + "confidence": 0.188 + }, + { + "text": "in", + "start": 100.38, + "end": 100.56, + "confidence": 0.284 + }, + { + "text": "a", + "start": 100.56, + "end": 100.9, + "confidence": 0.407 + }, + { + "text": "mine", + "start": 100.9, + "end": 101.26, + "confidence": 0.591 + } + ] + }, + { + "id": 8, + "seek": 9000, + "start": 101.36, + "end": 106.82, + "text": " Tell you, you're the greatest", + "tokens": [ + 50923, + 5115, + 291, + 11, + 291, + 434, + 264, + 6636, + 51215 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.962, + "words": [ + { + "text": "Tell", + "start": 101.36, + "end": 102.24, + "confidence": 0.898 + }, + { + "text": "you,", + "start": 102.24, + "end": 102.92, + "confidence": 0.993 + }, + { + "text": "you're", + "start": 102.92, + "end": 103.82, + "confidence": 0.971 + }, + { + "text": "the", + "start": 103.82, + "end": 104.34, + "confidence": 0.969 + }, + { + "text": "greatest", + "start": 104.34, + "end": 106.82, + "confidence": 0.973 + } + ] + }, + { + "id": 9, + "seek": 9000, + "start": 107.22, + "end": 113.02, + "text": " But once you turn they hate us", + "tokens": [ + 51215, + 583, + 1564, + 291, + 1261, + 436, + 4700, + 505, + 51520 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.862, + "words": [ + { + "text": "But", + "start": 107.22, + "end": 107.62, + "confidence": 0.996 + }, + { + "text": "once", + "start": 107.62, + "end": 108.44, + "confidence": 0.826 + }, + { + "text": "you", + "start": 108.44, + "end": 109.2, + "confidence": 0.998 + }, + { + "text": "turn", + "start": 109.2, + "end": 109.9, + "confidence": 0.908 + }, + { + "text": "they", + "start": 109.9, + "end": 110.38, + "confidence": 0.481 + }, + { + "text": "hate", + "start": 110.38, + "end": 111.92, + "confidence": 0.988 + }, + { + "text": "us", + "start": 111.92, + "end": 113.02, + "confidence": 0.999 + } + ] + }, + { + "id": 10, + "seek": 9000, + "start": 114.64, + "end": 116.2, + "text": " Oh, the misery", + "tokens": [ + 51585, + 876, + 11, + 264, + 32309, + 51700 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.967, + "words": [ + { + "text": "Oh,", + "start": 114.64, + "end": 115.16, + "confidence": 0.912 + }, + { + "text": "the", + "start": 115.22, + "end": 115.44, + "confidence": 0.993 + }, + { + "text": "misery", + "start": 115.44, + "end": 116.2, + "confidence": 0.999 + } + ] + }, + { + "id": 11, + "seek": 9000, + "start": 116.84, + "end": 119.3, + "text": " Everybody wants to be my enemy", + "tokens": [ + 51700, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51860 + ], + "temperature": 0.4, + "avg_logprob": -0.41540291383094397, + "compression_ratio": 1.5369458128078817, + "no_speech_prob": 0.7202673554420471, + "confidence": 0.944, + "words": [ + { + "text": "Everybody", + "start": 116.84, + "end": 117.6, + "confidence": 0.724 + }, + { + "text": "wants", + "start": 117.6, + "end": 117.96, + "confidence": 0.993 + }, + { + "text": "to", + "start": 117.96, + "end": 118.14, + "confidence": 0.998 + }, + { + "text": "be", + "start": 118.14, + "end": 118.26, + "confidence": 0.999 + }, + { + "text": "my", + "start": 118.26, + "end": 118.62, + "confidence": 0.997 + }, + { + "text": "enemy", + "start": 118.62, + "end": 119.3, + "confidence": 0.988 + } + ] + }, + { + "id": 12, + "seek": 11992, + "start": 119.96, + "end": 126.26, + "text": " I smell the sympathy Everybody wants to be my enemy", + "tokens": [ + 50364, + 286, + 4316, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50686 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.725, + "words": [ + { + "text": "I", + "start": 119.96, + "end": 120.78, + "confidence": 0.324 + }, + { + "text": "smell", + "start": 120.78, + "end": 121.42, + "confidence": 0.796 + }, + { + "text": "the", + "start": 121.42, + "end": 121.72, + "confidence": 0.857 + }, + { + "text": "sympathy", + "start": 121.72, + "end": 122.54, + "confidence": 0.981 + }, + { + "text": "Everybody", + "start": 122.54, + "end": 123.8, + "confidence": 0.265 + }, + { + "text": "wants", + "start": 123.8, + "end": 124.18, + "confidence": 0.984 + }, + { + "text": "to", + "start": 124.18, + "end": 124.48, + "confidence": 0.994 + }, + { + "text": "be", + "start": 124.48, + "end": 124.86, + "confidence": 0.994 + }, + { + "text": "my", + "start": 124.86, + "end": 125.58, + "confidence": 0.747 + }, + { + "text": "enemy", + "start": 125.58, + "end": 126.26, + "confidence": 0.967 + } + ] + }, + { + "id": 13, + "seek": 11992, + "start": 126.6, + "end": 132.52, + "text": " Look out for yourself My enemy", + "tokens": [ + 50686, + 2053, + 484, + 337, + 1803, + 1222, + 5945, + 50994 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.712, + "words": [ + { + "text": "Look", + "start": 126.6, + "end": 130.08, + "confidence": 0.463 + }, + { + "text": "out", + "start": 130.08, + "end": 130.26, + "confidence": 0.74 + }, + { + "text": "for", + "start": 130.26, + "end": 130.48, + "confidence": 0.993 + }, + { + "text": "yourself", + "start": 130.48, + "end": 130.98, + "confidence": 0.983 + }, + { + "text": "My", + "start": 130.98, + "end": 131.78, + "confidence": 0.399 + }, + { + "text": "enemy", + "start": 131.78, + "end": 132.52, + "confidence": 0.978 + } + ] + }, + { + "id": 14, + "seek": 11992, + "start": 133.92, + "end": 137.26, + "text": " Look out for yourself", + "tokens": [ + 50994, + 2053, + 484, + 337, + 1803, + 51230 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.977, + "words": [ + { + "text": "Look", + "start": 133.92, + "end": 134.06, + "confidence": 0.923 + }, + { + "text": "out", + "start": 134.06, + "end": 136.56, + "confidence": 0.989 + }, + { + "text": "for", + "start": 136.56, + "end": 136.74, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.74, + "end": 137.26, + "confidence": 0.999 + } + ] + }, + { + "id": 15, + "seek": 11992, + "start": 137.62, + "end": 140.18, + "text": " Look, okay, I'm Hoping that somebody pray for me", + "tokens": [ + 51230, + 2053, + 11, + 1392, + 11, + 286, + 478, + 13438, + 278, + 300, + 2618, + 3690, + 337, + 385, + 51374 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.779, + "words": [ + { + "text": "Look,", + "start": 137.62, + "end": 137.88, + "confidence": 0.774 + }, + { + "text": "okay,", + "start": 138.44, + "end": 138.58, + "confidence": 0.665 + }, + { + "text": "I'm", + "start": 138.76, + "end": 138.94, + "confidence": 0.965 + }, + { + "text": "Hoping", + "start": 138.94, + "end": 139.22, + "confidence": 0.414 + }, + { + "text": "that", + "start": 139.22, + "end": 139.34, + "confidence": 0.967 + }, + { + "text": "somebody", + "start": 139.34, + "end": 139.58, + "confidence": 0.985 + }, + { + "text": "pray", + "start": 139.58, + "end": 139.86, + "confidence": 0.855 + }, + { + "text": "for", + "start": 139.86, + "end": 140.02, + "confidence": 0.959 + }, + { + "text": "me", + "start": 140.02, + "end": 140.18, + "confidence": 0.998 + } + ] + }, + { + "id": 16, + "seek": 11992, + "start": 140.36, + "end": 143.2, + "text": " I'm praying that somebody vote for me I'm staying where nobody's supposed to be", + "tokens": [ + 51374, + 286, + 478, + 15611, + 300, + 2618, + 4740, + 337, + 385, + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312, + 51524 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.846, + "words": [ + { + "text": "I'm", + "start": 140.36, + "end": 140.48, + "confidence": 0.788 + }, + { + "text": "praying", + "start": 140.48, + "end": 140.64, + "confidence": 0.869 + }, + { + "text": "that", + "start": 140.64, + "end": 140.84, + "confidence": 0.97 + }, + { + "text": "somebody", + "start": 140.84, + "end": 141.1, + "confidence": 0.996 + }, + { + "text": "vote", + "start": 141.1, + "end": 141.34, + "confidence": 0.372 + }, + { + "text": "for", + "start": 141.34, + "end": 141.5, + "confidence": 0.999 + }, + { + "text": "me", + "start": 141.5, + "end": 141.7, + "confidence": 0.999 + }, + { + "text": "I'm", + "start": 141.7, + "end": 141.96, + "confidence": 0.902 + }, + { + "text": "staying", + "start": 141.96, + "end": 142.16, + "confidence": 0.696 + }, + { + "text": "where", + "start": 142.16, + "end": 142.34, + "confidence": 0.964 + }, + { + "text": "nobody's", + "start": 142.34, + "end": 142.68, + "confidence": 0.768 + }, + { + "text": "supposed", + "start": 142.68, + "end": 142.96, + "confidence": 0.97 + }, + { + "text": "to", + "start": 142.96, + "end": 143.1, + "confidence": 0.996 + }, + { + "text": "be", + "start": 143.1, + "end": 143.2, + "confidence": 0.975 + } + ] + }, + { + "id": 17, + "seek": 11992, + "start": 143.24, + "end": 146.29, + "text": " I propose to be in a wreck of emotions Ready to go whenever she let me know", + "tokens": [ + 51524, + 286, + 17421, + 281, + 312, + 294, + 257, + 21478, + 295, + 8462, + 9944, + 281, + 352, + 5699, + 750, + 718, + 385, + 458, + 51684 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.711, + "words": [ + { + "text": "I", + "start": 143.24, + "end": 143.4, + "confidence": 0.432 + }, + { + "text": "propose", + "start": 143.4, + "end": 143.72, + "confidence": 0.301 + }, + { + "text": "to", + "start": 143.72, + "end": 143.9, + "confidence": 0.838 + }, + { + "text": "be", + "start": 143.9, + "end": 144.04, + "confidence": 0.545 + }, + { + "text": "in", + "start": 144.04, + "end": 144.18, + "confidence": 0.741 + }, + { + "text": "a", + "start": 144.18, + "end": 144.3, + "confidence": 0.678 + }, + { + "text": "wreck", + "start": 144.3, + "end": 144.4, + "confidence": 0.392 + }, + { + "text": "of", + "start": 144.4, + "end": 144.54, + "confidence": 0.983 + }, + { + "text": "emotions", + "start": 144.54, + "end": 145.0, + "confidence": 0.916 + }, + { + "text": "Ready", + "start": 145.0, + "end": 145.3, + "confidence": 0.864 + }, + { + "text": "to", + "start": 145.3, + "end": 145.46, + "confidence": 0.994 + }, + { + "text": "go", + "start": 145.46, + "end": 145.6, + "confidence": 0.996 + }, + { + "text": "whenever", + "start": 145.6, + "end": 145.8, + "confidence": 0.844 + }, + { + "text": "she", + "start": 145.8, + "end": 145.96, + "confidence": 0.428 + }, + { + "text": "let", + "start": 145.96, + "end": 146.08, + "confidence": 0.954 + }, + { + "text": "me", + "start": 146.08, + "end": 146.16, + "confidence": 0.999 + }, + { + "text": "know", + "start": 146.16, + "end": 146.29, + "confidence": 0.977 + } + ] + }, + { + "id": 18, + "seek": 11992, + "start": 146.29, + "end": 149.78, + "text": " The road is long so put the pedal into the flow The enemy on my trail, my energy unavailable", + "tokens": [ + 51684, + 440, + 3060, + 307, + 938, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095, + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699, + 51860 + ], + "temperature": 0.4, + "avg_logprob": -0.38010619236872745, + "compression_ratio": 1.755458515283843, + "no_speech_prob": 0.7215486168861389, + "confidence": 0.912, + "words": [ + { + "text": "The", + "start": 146.29, + "end": 146.52, + "confidence": 0.897 + }, + { + "text": "road", + "start": 146.52, + "end": 146.68, + "confidence": 0.986 + }, + { + "text": "is", + "start": 146.68, + "end": 146.78, + "confidence": 0.971 + }, + { + "text": "long", + "start": 146.78, + "end": 146.92, + "confidence": 0.913 + }, + { + "text": "so", + "start": 146.92, + "end": 147.04, + "confidence": 0.67 + }, + { + "text": "put", + "start": 147.04, + "end": 147.16, + "confidence": 0.954 + }, + { + "text": "the", + "start": 147.16, + "end": 147.28, + "confidence": 0.975 + }, + { + "text": "pedal", + "start": 147.28, + "end": 147.42, + "confidence": 0.924 + }, + { + "text": "into", + "start": 147.42, + "end": 147.6, + "confidence": 0.946 + }, + { + "text": "the", + "start": 147.6, + "end": 147.74, + "confidence": 0.992 + }, + { + "text": "flow", + "start": 147.74, + "end": 147.88, + "confidence": 0.844 + }, + { + "text": "The", + "start": 147.88, + "end": 148.08, + "confidence": 0.71 + }, + { + "text": "enemy", + "start": 148.08, + "end": 148.26, + "confidence": 0.887 + }, + { + "text": "on", + "start": 148.26, + "end": 148.38, + "confidence": 0.969 + }, + { + "text": "my", + "start": 148.38, + "end": 148.5, + "confidence": 0.982 + }, + { + "text": "trail,", + "start": 148.5, + "end": 148.64, + "confidence": 0.992 + }, + { + "text": "my", + "start": 148.72, + "end": 148.86, + "confidence": 0.99 + }, + { + "text": "energy", + "start": 148.86, + "end": 149.1, + "confidence": 0.992 + }, + { + "text": "unavailable", + "start": 149.1, + "end": 149.78, + "confidence": 0.875 + } + ] + }, + { + "id": 19, + "seek": 14984, + "start": 150.02, + "end": 151.18, + "text": " I'ma tell the master the way go", + "tokens": [ + 50364, + 286, + 478, + 64, + 980, + 264, + 4505, + 264, + 636, + 352, + 50436 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.592, + "words": [ + { + "text": "I'ma", + "start": 150.02, + "end": 150.24, + "confidence": 0.68 + }, + { + "text": "tell", + "start": 150.24, + "end": 150.36, + "confidence": 0.983 + }, + { + "text": "the", + "start": 150.36, + "end": 150.46, + "confidence": 0.248 + }, + { + "text": "master", + "start": 150.46, + "end": 150.68, + "confidence": 0.223 + }, + { + "text": "the", + "start": 150.68, + "end": 150.9, + "confidence": 0.783 + }, + { + "text": "way", + "start": 150.9, + "end": 151.0, + "confidence": 0.991 + }, + { + "text": "go", + "start": 151.0, + "end": 151.18, + "confidence": 0.673 + } + ] + }, + { + "id": 20, + "seek": 14984, + "start": 151.2, + "end": 152.54, + "text": " Hey, when I'm flat on my track to the top", + "tokens": [ + 50436, + 1911, + 11, + 562, + 286, + 478, + 4962, + 322, + 452, + 2837, + 281, + 264, + 1192, + 50500 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.589, + "words": [ + { + "text": "Hey,", + "start": 151.2, + "end": 151.4, + "confidence": 0.215 + }, + { + "text": "when", + "start": 151.46, + "end": 151.56, + "confidence": 0.811 + }, + { + "text": "I'm", + "start": 151.56, + "end": 151.72, + "confidence": 0.523 + }, + { + "text": "flat", + "start": 151.72, + "end": 151.8, + "confidence": 0.241 + }, + { + "text": "on", + "start": 151.8, + "end": 151.9, + "confidence": 0.636 + }, + { + "text": "my", + "start": 151.9, + "end": 152.02, + "confidence": 0.961 + }, + { + "text": "track", + "start": 152.02, + "end": 152.16, + "confidence": 0.482 + }, + { + "text": "to", + "start": 152.16, + "end": 152.32, + "confidence": 0.874 + }, + { + "text": "the", + "start": 152.32, + "end": 152.42, + "confidence": 0.996 + }, + { + "text": "top", + "start": 152.42, + "end": 152.54, + "confidence": 0.998 + } + ] + }, + { + "id": 21, + "seek": 14984, + "start": 152.54, + "end": 154.34, + "text": " I been out of shape, thinking that I'm a box of an astronaut", + "tokens": [ + 50500, + 286, + 668, + 484, + 295, + 3909, + 11, + 1953, + 300, + 286, + 478, + 257, + 2424, + 295, + 364, + 18516, + 50592 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.725, + "words": [ + { + "text": "I", + "start": 152.54, + "end": 152.7, + "confidence": 0.988 + }, + { + "text": "been", + "start": 152.7, + "end": 152.76, + "confidence": 0.617 + }, + { + "text": "out", + "start": 152.76, + "end": 152.9, + "confidence": 0.679 + }, + { + "text": "of", + "start": 152.9, + "end": 153.02, + "confidence": 0.959 + }, + { + "text": "shape,", + "start": 153.02, + "end": 153.1, + "confidence": 0.999 + }, + { + "text": "thinking", + "start": 153.16, + "end": 153.32, + "confidence": 0.667 + }, + { + "text": "that", + "start": 153.32, + "end": 153.46, + "confidence": 0.261 + }, + { + "text": "I'm", + "start": 153.46, + "end": 153.62, + "confidence": 0.736 + }, + { + "text": "a", + "start": 153.62, + "end": 153.72, + "confidence": 0.725 + }, + { + "text": "box", + "start": 153.72, + "end": 153.76, + "confidence": 0.878 + }, + { + "text": "of", + "start": 153.76, + "end": 153.9, + "confidence": 0.483 + }, + { + "text": "an", + "start": 153.9, + "end": 154.04, + "confidence": 0.986 + }, + { + "text": "astronaut", + "start": 154.04, + "end": 154.34, + "confidence": 0.984 + } + ] + }, + { + "id": 22, + "seek": 14984, + "start": 154.5, + "end": 156.42, + "text": " Blasted off the planet, rocked the cars, catastrophic", + "tokens": [ + 50592, + 2177, + 34440, + 766, + 264, + 5054, + 11, + 3727, + 292, + 264, + 5163, + 11, + 34915, + 50692 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.742, + "words": [ + { + "text": "Blasted", + "start": 154.5, + "end": 154.82, + "confidence": 0.817 + }, + { + "text": "off", + "start": 154.82, + "end": 155.02, + "confidence": 0.981 + }, + { + "text": "the", + "start": 155.02, + "end": 155.18, + "confidence": 0.95 + }, + { + "text": "planet,", + "start": 155.18, + "end": 155.36, + "confidence": 0.994 + }, + { + "text": "rocked", + "start": 155.44, + "end": 155.66, + "confidence": 0.691 + }, + { + "text": "the", + "start": 155.66, + "end": 155.74, + "confidence": 0.801 + }, + { + "text": "cars,", + "start": 155.74, + "end": 155.9, + "confidence": 0.308 + }, + { + "text": "catastrophic", + "start": 155.9, + "end": 156.42, + "confidence": 0.693 + } + ] + }, + { + "id": 23, + "seek": 14984, + "start": 156.44, + "end": 158.18, + "text": " And it matters more because I had it, now I had it", + "tokens": [ + 50692, + 400, + 309, + 7001, + 544, + 570, + 286, + 632, + 309, + 11, + 586, + 286, + 632, + 309, + 50777 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.787, + "words": [ + { + "text": "And", + "start": 156.44, + "end": 156.64, + "confidence": 0.858 + }, + { + "text": "it", + "start": 156.64, + "end": 156.78, + "confidence": 0.678 + }, + { + "text": "matters", + "start": 156.78, + "end": 156.96, + "confidence": 0.996 + }, + { + "text": "more", + "start": 156.96, + "end": 157.18, + "confidence": 0.981 + }, + { + "text": "because", + "start": 157.18, + "end": 157.42, + "confidence": 0.749 + }, + { + "text": "I", + "start": 157.42, + "end": 157.56, + "confidence": 0.703 + }, + { + "text": "had", + "start": 157.56, + "end": 157.66, + "confidence": 0.951 + }, + { + "text": "it,", + "start": 157.66, + "end": 157.8, + "confidence": 0.986 + }, + { + "text": "now", + "start": 157.8, + "end": 157.94, + "confidence": 0.56 + }, + { + "text": "I", + "start": 157.94, + "end": 158.06, + "confidence": 0.754 + }, + { + "text": "had", + "start": 158.06, + "end": 158.12, + "confidence": 0.937 + }, + { + "text": "it", + "start": 158.12, + "end": 158.18, + "confidence": 0.51 + } + ] + }, + { + "id": 24, + "seek": 14984, + "start": 158.18, + "end": 159.84, + "text": " I thought about wreaking havoc on an opposition", + "tokens": [ + 50777, + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504, + 50864 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.959, + "words": [ + { + "text": "I", + "start": 158.18, + "end": 158.24, + "confidence": 0.915 + }, + { + "text": "thought", + "start": 158.24, + "end": 158.38, + "confidence": 0.982 + }, + { + "text": "about", + "start": 158.38, + "end": 158.58, + "confidence": 0.981 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.86, + "confidence": 0.978 + }, + { + "text": "havoc", + "start": 158.86, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.3, + "confidence": 0.911 + }, + { + "text": "an", + "start": 159.3, + "end": 159.46, + "confidence": 0.909 + }, + { + "text": "opposition", + "start": 159.46, + "end": 159.84, + "confidence": 0.984 + } + ] + }, + { + "id": 25, + "seek": 14984, + "start": 159.86, + "end": 161.56, + "text": " Kinda shockin', they want it static, with precision", + "tokens": [ + 50864, + 35553, + 5588, + 259, + 6098, + 436, + 528, + 309, + 13437, + 11, + 365, + 18356, + 50952 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.572, + "words": [ + { + "text": "Kinda", + "start": 159.86, + "end": 160.1, + "confidence": 0.654 + }, + { + "text": "shockin',", + "start": 160.1, + "end": 160.48, + "confidence": 0.381 + }, + { + "text": "they", + "start": 160.48, + "end": 160.6, + "confidence": 0.305 + }, + { + "text": "want", + "start": 160.6, + "end": 160.74, + "confidence": 0.57 + }, + { + "text": "it", + "start": 160.74, + "end": 160.88, + "confidence": 0.695 + }, + { + "text": "static,", + "start": 160.88, + "end": 161.02, + "confidence": 0.986 + }, + { + "text": "with", + "start": 161.14, + "end": 161.28, + "confidence": 0.733 + }, + { + "text": "precision", + "start": 161.28, + "end": 161.56, + "confidence": 0.794 + } + ] + }, + { + "id": 26, + "seek": 14984, + "start": 161.62, + "end": 163.36, + "text": " I'm automatic, quarterback, I ain't talkin' second", + "tokens": [ + 50952, + 286, + 478, + 12509, + 11, + 31952, + 11, + 286, + 7862, + 380, + 39243, + 6, + 1150, + 51043 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.812, + "words": [ + { + "text": "I'm", + "start": 161.62, + "end": 161.88, + "confidence": 0.963 + }, + { + "text": "automatic,", + "start": 161.88, + "end": 162.18, + "confidence": 0.961 + }, + { + "text": "quarterback,", + "start": 162.56, + "end": 162.58, + "confidence": 0.667 + }, + { + "text": "I", + "start": 162.66, + "end": 162.78, + "confidence": 0.727 + }, + { + "text": "ain't", + "start": 162.78, + "end": 162.9, + "confidence": 0.984 + }, + { + "text": "talkin'", + "start": 162.9, + "end": 163.2, + "confidence": 0.747 + }, + { + "text": "second", + "start": 163.2, + "end": 163.36, + "confidence": 0.536 + } + ] + }, + { + "id": 27, + "seek": 14984, + "start": 163.36, + "end": 165.2, + "text": " Pack it, pack it up, I don't panic, better batter up", + "tokens": [ + 51043, + 18466, + 309, + 11, + 2844, + 309, + 493, + 11, + 286, + 500, + 380, + 14783, + 11, + 1101, + 4220, + 493, + 51132 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.789, + "words": [ + { + "text": "Pack", + "start": 163.36, + "end": 163.6, + "confidence": 0.939 + }, + { + "text": "it,", + "start": 163.6, + "end": 163.68, + "confidence": 0.893 + }, + { + "text": "pack", + "start": 163.7, + "end": 163.88, + "confidence": 0.962 + }, + { + "text": "it", + "start": 163.88, + "end": 164.02, + "confidence": 0.998 + }, + { + "text": "up,", + "start": 164.02, + "end": 164.12, + "confidence": 0.979 + }, + { + "text": "I", + "start": 164.12, + "end": 164.22, + "confidence": 0.974 + }, + { + "text": "don't", + "start": 164.22, + "end": 164.36, + "confidence": 0.888 + }, + { + "text": "panic,", + "start": 164.36, + "end": 164.56, + "confidence": 0.999 + }, + { + "text": "better", + "start": 164.62, + "end": 164.88, + "confidence": 0.568 + }, + { + "text": "batter", + "start": 164.88, + "end": 165.02, + "confidence": 0.176 + }, + { + "text": "up", + "start": 165.02, + "end": 165.2, + "confidence": 0.963 + } + ] + }, + { + "id": 28, + "seek": 14984, + "start": 165.2, + "end": 166.76, + "text": " Who the baddest, it don't matter cause we is your", + "tokens": [ + 51132, + 2102, + 264, + 1578, + 23748, + 11, + 309, + 500, + 380, + 1871, + 3082, + 321, + 307, + 428, + 51210 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.714, + "words": [ + { + "text": "Who", + "start": 165.2, + "end": 165.4, + "confidence": 0.994 + }, + { + "text": "the", + "start": 165.4, + "end": 165.54, + "confidence": 0.954 + }, + { + "text": "baddest,", + "start": 165.54, + "end": 165.78, + "confidence": 0.833 + }, + { + "text": "it", + "start": 165.8, + "end": 165.9, + "confidence": 0.993 + }, + { + "text": "don't", + "start": 165.9, + "end": 166.04, + "confidence": 0.995 + }, + { + "text": "matter", + "start": 166.04, + "end": 166.18, + "confidence": 0.999 + }, + { + "text": "cause", + "start": 166.18, + "end": 166.38, + "confidence": 0.235 + }, + { + "text": "we", + "start": 166.38, + "end": 166.56, + "confidence": 0.956 + }, + { + "text": "is", + "start": 166.56, + "end": 166.64, + "confidence": 0.208 + }, + { + "text": "your", + "start": 166.64, + "end": 166.76, + "confidence": 0.577 + } + ] + }, + { + "id": 29, + "seek": 14984, + "start": 166.86, + "end": 169.82, + "text": " Everybody wants to be my enemy", + "tokens": [ + 51210, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51402 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.976, + "words": [ + { + "text": "Everybody", + "start": 166.86, + "end": 167.44, + "confidence": 0.922 + }, + { + "text": "wants", + "start": 167.44, + "end": 167.82, + "confidence": 0.985 + }, + { + "text": "to", + "start": 167.82, + "end": 168.0, + "confidence": 0.998 + }, + { + "text": "be", + "start": 168.0, + "end": 168.16, + "confidence": 0.998 + }, + { + "text": "my", + "start": 168.16, + "end": 168.44, + "confidence": 0.993 + }, + { + "text": "enemy", + "start": 168.44, + "end": 169.82, + "confidence": 0.965 + } + ] + }, + { + "id": 30, + "seek": 14984, + "start": 170.62, + "end": 172.24, + "text": " Spare the sympathy", + "tokens": [ + 51402, + 1738, + 543, + 264, + 33240, + 51522 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.815, + "words": [ + { + "text": "Spare", + "start": 170.62, + "end": 171.28, + "confidence": 0.711 + }, + { + "text": "the", + "start": 171.28, + "end": 171.54, + "confidence": 0.974 + }, + { + "text": "sympathy", + "start": 171.54, + "end": 172.24, + "confidence": 0.896 + } + ] + }, + { + "id": 31, + "seek": 14984, + "start": 173.1, + "end": 176.24, + "text": " Everybody wants to be my enemy", + "tokens": [ + 51522, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51720 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.984, + "words": [ + { + "text": "Everybody", + "start": 173.1, + "end": 173.7, + "confidence": 0.994 + }, + { + "text": "wants", + "start": 173.7, + "end": 174.06, + "confidence": 0.994 + }, + { + "text": "to", + "start": 174.06, + "end": 174.36, + "confidence": 0.999 + }, + { + "text": "be", + "start": 174.36, + "end": 174.72, + "confidence": 0.999 + }, + { + "text": "my", + "start": 174.72, + "end": 175.46, + "confidence": 0.923 + }, + { + "text": "enemy", + "start": 175.46, + "end": 176.24, + "confidence": 0.998 + } + ] + }, + { + "id": 32, + "seek": 14984, + "start": 176.86, + "end": 178.48, + "text": " Oh, the misery", + "tokens": [ + 51720, + 876, + 11, + 264, + 32309, + 51834 + ], + "temperature": 0.4, + "avg_logprob": -0.45425062569958424, + "compression_ratio": 1.7374631268436578, + "no_speech_prob": 0.7155851125717163, + "confidence": 0.879, + "words": [ + { + "text": "Oh,", + "start": 176.86, + "end": 177.5, + "confidence": 0.692 + }, + { + "text": "the", + "start": 177.56, + "end": 177.76, + "confidence": 0.984 + }, + { + "text": "misery", + "start": 177.76, + "end": 178.48, + "confidence": 0.999 + } + ] + }, + { + "id": 33, + "seek": 17924, + "start": 179.3, + "end": 181.62, + "text": " Everybody wants to be my enemy", + "tokens": [ + 50364, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50514 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.949, + "words": [ + { + "text": "Everybody", + "start": 179.3, + "end": 179.92, + "confidence": 0.776 + }, + { + "text": "wants", + "start": 179.92, + "end": 180.3, + "confidence": 0.98 + }, + { + "text": "to", + "start": 180.3, + "end": 180.48, + "confidence": 0.997 + }, + { + "text": "be", + "start": 180.48, + "end": 180.62, + "confidence": 0.998 + }, + { + "text": "my", + "start": 180.62, + "end": 180.92, + "confidence": 0.992 + }, + { + "text": "enemy", + "start": 180.92, + "end": 181.62, + "confidence": 0.973 + } + ] + }, + { + "id": 34, + "seek": 17924, + "start": 183.08, + "end": 184.64, + "text": " Spell the sympathy", + "tokens": [ + 50514, + 3550, + 285, + 264, + 33240, + 50664 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.56, + "words": [ + { + "text": "Spell", + "start": 183.08, + "end": 183.78, + "confidence": 0.345 + }, + { + "text": "the", + "start": 183.78, + "end": 184.08, + "confidence": 0.926 + }, + { + "text": "sympathy", + "start": 184.08, + "end": 184.64, + "confidence": 0.892 + } + ] + }, + { + "id": 35, + "seek": 17924, + "start": 185.5, + "end": 188.48, + "text": " Everybody wants to be my enemy", + "tokens": [ + 50664, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50814 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.961, + "words": [ + { + "text": "Everybody", + "start": 185.5, + "end": 186.16, + "confidence": 0.99 + }, + { + "text": "wants", + "start": 186.16, + "end": 186.52, + "confidence": 0.988 + }, + { + "text": "to", + "start": 186.52, + "end": 186.76, + "confidence": 0.999 + }, + { + "text": "be", + "start": 186.76, + "end": 187.28, + "confidence": 0.999 + }, + { + "text": "my", + "start": 187.28, + "end": 187.94, + "confidence": 0.811 + }, + { + "text": "enemy", + "start": 187.94, + "end": 188.48, + "confidence": 0.992 + } + ] + }, + { + "id": 36, + "seek": 17924, + "start": 188.9, + "end": 192.06, + "text": " Everywhere I swear I'll never be a saint", + "tokens": [ + 50814, + 37322, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374, + 51014 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.558, + "words": [ + { + "text": "Everywhere", + "start": 188.9, + "end": 189.58, + "confidence": 0.045 + }, + { + "text": "I", + "start": 189.58, + "end": 190.14, + "confidence": 0.402 + }, + { + "text": "swear", + "start": 190.14, + "end": 190.56, + "confidence": 0.99 + }, + { + "text": "I'll", + "start": 190.56, + "end": 190.96, + "confidence": 0.744 + }, + { + "text": "never", + "start": 190.96, + "end": 191.32, + "confidence": 0.996 + }, + { + "text": "be", + "start": 191.32, + "end": 191.68, + "confidence": 0.995 + }, + { + "text": "a", + "start": 191.68, + "end": 191.9, + "confidence": 0.61 + }, + { + "text": "saint", + "start": 191.9, + "end": 192.06, + "confidence": 0.87 + } + ] + }, + { + "id": 37, + "seek": 17924, + "start": 192.1, + "end": 194.78, + "text": " Nowhere my enemy", + "tokens": [ + 51014, + 823, + 6703, + 452, + 5945, + 51164 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.679, + "words": [ + { + "text": "Nowhere", + "start": 192.1, + "end": 193.18, + "confidence": 0.658 + }, + { + "text": "my", + "start": 193.18, + "end": 194.08, + "confidence": 0.502 + }, + { + "text": "enemy", + "start": 194.08, + "end": 194.78, + "confidence": 0.979 + } + ] + }, + { + "id": 38, + "seek": 17924, + "start": 195.14, + "end": 198.36, + "text": " Everywhere I swear I'll never be a saint", + "tokens": [ + 51164, + 37322, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374, + 51314 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.994, + "words": [ + { + "text": "Everywhere", + "start": 195.14, + "end": 196.06, + "confidence": 0.973 + }, + { + "text": "I", + "start": 196.06, + "end": 196.5, + "confidence": 0.994 + }, + { + "text": "swear", + "start": 196.5, + "end": 196.78, + "confidence": 0.997 + }, + { + "text": "I'll", + "start": 196.78, + "end": 197.34, + "confidence": 0.995 + }, + { + "text": "never", + "start": 197.34, + "end": 197.56, + "confidence": 0.999 + }, + { + "text": "be", + "start": 197.56, + "end": 197.92, + "confidence": 0.999 + }, + { + "text": "a", + "start": 197.92, + "end": 198.12, + "confidence": 0.998 + }, + { + "text": "saint", + "start": 198.12, + "end": 198.36, + "confidence": 0.993 + } + ] + }, + { + "id": 39, + "seek": 17924, + "start": 198.5, + "end": 199.44, + "text": " Look out for yourself", + "tokens": [ + 51314, + 2053, + 484, + 337, + 1803, + 51414 + ], + "temperature": 0.4, + "avg_logprob": -0.35596295406943873, + "compression_ratio": 1.8440366972477065, + "no_speech_prob": 0.4185682237148285, + "confidence": 0.754, + "words": [ + { + "text": "Look", + "start": 198.5, + "end": 198.74, + "confidence": 0.356 + }, + { + "text": "out", + "start": 198.74, + "end": 198.92, + "confidence": 0.98 + }, + { + "text": "for", + "start": 198.92, + "end": 199.12, + "confidence": 0.97 + }, + { + "text": "yourself", + "start": 199.12, + "end": 199.44, + "confidence": 0.956 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/nocond_music.mp4.words.json b/tests/expected/corner_cases/nocond_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9647c5b07f285b8445d88483e2c9bd612e3d7413 --- /dev/null +++ b/tests/expected/corner_cases/nocond_music.mp4.words.json @@ -0,0 +1,2750 @@ +{ + "text": " I Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself But I'm ready Your words up on the wall as you're praying for my phone And the laughter in the holes and the names that I've been called I stack it in my mind and I'm waiting for the time When I show you what it's like to be worse but in the mind Tell you you're the greatest But once you turn they hate us Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay I'm hoping that somebody pray for me I'm praying that somebody hold for me. I'm staying where nobody's supposed to be. I propose to be in a wreck of emotions. Ready to go whenever you let me know. The road is long, so put the pedal into the flow. The enemy on my trail, my energy unavailable. I'ma tell them I said away, go away. When I'm plotting, I'ma drive to the top. I've been out of shape, thinking out of the box. I'm an astronaut, blasted off the planet. Rock the cars, catastrophic, and it matters more because I had it now. Had I thought about wreaking havoc on an opposition. Kind of shocking, they want it static. With precision, I'm automatic. Quarterback, I ain't talking second. Pack it, pack it up. I don't panic, better, better. Up who the baddest. it don't matter cause we is your enemy. I swear I'll never be insane You gotta be insane", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 2.24, + "end": 2.36, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.9355003237724304, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7811808586120605, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 2.24, + "end": 2.36, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.0, + "end": 69.92, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 50364, + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50866 + ], + "temperature": 0.0, + "avg_logprob": -0.45629865472966974, + "compression_ratio": 1.62, + "no_speech_prob": 0.8699213266372681, + "confidence": 0.669, + "words": [ + { + "text": "Oh,", + "start": 60.0, + "end": 63.4, + "confidence": 0.084 + }, + { + "text": "the", + "start": 65.24, + "end": 65.5, + "confidence": 0.797 + }, + { + "text": "misery", + "start": 65.5, + "end": 66.3, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 66.3, + "end": 67.7, + "confidence": 0.43 + }, + { + "text": "wants", + "start": 67.7, + "end": 68.08, + "confidence": 0.983 + }, + { + "text": "to", + "start": 68.08, + "end": 68.26, + "confidence": 0.993 + }, + { + "text": "be", + "start": 68.26, + "end": 68.44, + "confidence": 0.996 + }, + { + "text": "my", + "start": 68.44, + "end": 68.74, + "confidence": 0.983 + }, + { + "text": "enemy", + "start": 68.74, + "end": 69.92, + "confidence": 0.984 + } + ] + }, + { + "id": 2, + "seek": 6000, + "start": 70.88, + "end": 76.42, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 50866, + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51182 + ], + "temperature": 0.0, + "avg_logprob": -0.45629865472966974, + "compression_ratio": 1.62, + "no_speech_prob": 0.8699213266372681, + "confidence": 0.934, + "words": [ + { + "text": "Spare", + "start": 70.88, + "end": 71.54, + "confidence": 0.814 + }, + { + "text": "the", + "start": 71.54, + "end": 71.8, + "confidence": 0.968 + }, + { + "text": "sympathy", + "start": 71.8, + "end": 72.64, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 72.64, + "end": 73.96, + "confidence": 0.963 + }, + { + "text": "wants", + "start": 73.96, + "end": 74.3, + "confidence": 0.994 + }, + { + "text": "to", + "start": 74.3, + "end": 74.64, + "confidence": 0.999 + }, + { + "text": "be", + "start": 74.64, + "end": 75.16, + "confidence": 0.999 + }, + { + "text": "my", + "start": 75.16, + "end": 75.72, + "confidence": 0.837 + }, + { + "text": "enemy", + "start": 75.72, + "end": 76.42, + "confidence": 0.997 + } + ] + }, + { + "id": 3, + "seek": 6000, + "start": 79.92, + "end": 82.68, + "text": " Look out for yourself My enemy", + "tokens": [ + 51182, + 2053, + 484, + 337, + 1803, + 1222, + 5945, + 51496 + ], + "temperature": 0.0, + "avg_logprob": -0.45629865472966974, + "compression_ratio": 1.62, + "no_speech_prob": 0.8699213266372681, + "confidence": 0.774, + "words": [ + { + "text": "Look", + "start": 79.92, + "end": 80.24, + "confidence": 0.43 + }, + { + "text": "out", + "start": 80.24, + "end": 80.46, + "confidence": 0.99 + }, + { + "text": "for", + "start": 80.46, + "end": 80.68, + "confidence": 0.989 + }, + { + "text": "yourself", + "start": 80.68, + "end": 81.14, + "confidence": 0.97 + }, + { + "text": "My", + "start": 81.14, + "end": 81.94, + "confidence": 0.536 + }, + { + "text": "enemy", + "start": 81.94, + "end": 82.68, + "confidence": 0.98 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 86.14, + "end": 88.48, + "text": " Look out for yourself But I'm ready", + "tokens": [ + 51496, + 2053, + 484, + 337, + 1803, + 583, + 286, + 478, + 1919, + 51792 + ], + "temperature": 0.0, + "avg_logprob": -0.45629865472966974, + "compression_ratio": 1.62, + "no_speech_prob": 0.8699213266372681, + "confidence": 0.928, + "words": [ + { + "text": "Look", + "start": 86.14, + "end": 86.46, + "confidence": 0.748 + }, + { + "text": "out", + "start": 86.46, + "end": 86.68, + "confidence": 0.988 + }, + { + "text": "for", + "start": 86.68, + "end": 86.88, + "confidence": 0.998 + }, + { + "text": "yourself", + "start": 86.88, + "end": 87.56, + "confidence": 0.999 + }, + { + "text": "But", + "start": 87.56, + "end": 87.92, + "confidence": 0.766 + }, + { + "text": "I'm", + "start": 87.92, + "end": 88.14, + "confidence": 0.989 + }, + { + "text": "ready", + "start": 88.14, + "end": 88.48, + "confidence": 0.995 + } + ] + }, + { + "id": 5, + "seek": 8856, + "start": 88.64, + "end": 91.7, + "text": " Your words up on the wall as you're praying for my phone", + "tokens": [ + 50364, + 2260, + 2283, + 493, + 322, + 264, + 2929, + 382, + 291, + 434, + 15611, + 337, + 452, + 2593, + 50520 + ], + "temperature": 0.0, + "avg_logprob": -0.2874426958037586, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7892544865608215, + "confidence": 0.82, + "words": [ + { + "text": "Your", + "start": 88.64, + "end": 88.86, + "confidence": 0.548 + }, + { + "text": "words", + "start": 88.86, + "end": 89.18, + "confidence": 0.92 + }, + { + "text": "up", + "start": 89.18, + "end": 89.48, + "confidence": 0.964 + }, + { + "text": "on", + "start": 89.48, + "end": 89.62, + "confidence": 0.995 + }, + { + "text": "the", + "start": 89.62, + "end": 89.78, + "confidence": 0.991 + }, + { + "text": "wall", + "start": 89.78, + "end": 90.14, + "confidence": 0.88 + }, + { + "text": "as", + "start": 90.14, + "end": 90.32, + "confidence": 0.402 + }, + { + "text": "you're", + "start": 90.32, + "end": 90.46, + "confidence": 0.828 + }, + { + "text": "praying", + "start": 90.46, + "end": 90.86, + "confidence": 0.967 + }, + { + "text": "for", + "start": 90.86, + "end": 91.1, + "confidence": 0.97 + }, + { + "text": "my", + "start": 91.1, + "end": 91.28, + "confidence": 0.989 + }, + { + "text": "phone", + "start": 91.28, + "end": 91.7, + "confidence": 0.702 + } + ] + }, + { + "id": 6, + "seek": 8856, + "start": 91.78, + "end": 94.74, + "text": " And the laughter in the holes and the names that I've been called", + "tokens": [ + 50520, + 400, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 286, + 600, + 668, + 1219, + 50680 + ], + "temperature": 0.0, + "avg_logprob": -0.2874426958037586, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7892544865608215, + "confidence": 0.884, + "words": [ + { + "text": "And", + "start": 91.78, + "end": 91.94, + "confidence": 0.844 + }, + { + "text": "the", + "start": 91.94, + "end": 92.12, + "confidence": 0.987 + }, + { + "text": "laughter", + "start": 92.12, + "end": 92.5, + "confidence": 0.994 + }, + { + "text": "in", + "start": 92.5, + "end": 92.72, + "confidence": 0.85 + }, + { + "text": "the", + "start": 92.72, + "end": 92.92, + "confidence": 0.997 + }, + { + "text": "holes", + "start": 92.92, + "end": 93.28, + "confidence": 0.607 + }, + { + "text": "and", + "start": 93.28, + "end": 93.48, + "confidence": 0.595 + }, + { + "text": "the", + "start": 93.48, + "end": 93.6, + "confidence": 0.991 + }, + { + "text": "names", + "start": 93.6, + "end": 93.84, + "confidence": 0.991 + }, + { + "text": "that", + "start": 93.84, + "end": 94.14, + "confidence": 0.957 + }, + { + "text": "I've", + "start": 94.14, + "end": 94.34, + "confidence": 0.914 + }, + { + "text": "been", + "start": 94.34, + "end": 94.46, + "confidence": 0.985 + }, + { + "text": "called", + "start": 94.46, + "end": 94.74, + "confidence": 0.912 + } + ] + }, + { + "id": 7, + "seek": 8856, + "start": 95.0, + "end": 97.82, + "text": " I stack it in my mind and I'm waiting for the time", + "tokens": [ + 50680, + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 286, + 478, + 3806, + 337, + 264, + 565, + 50832 + ], + "temperature": 0.0, + "avg_logprob": -0.2874426958037586, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7892544865608215, + "confidence": 0.959, + "words": [ + { + "text": "I", + "start": 95.0, + "end": 95.14, + "confidence": 0.987 + }, + { + "text": "stack", + "start": 95.14, + "end": 95.46, + "confidence": 0.825 + }, + { + "text": "it", + "start": 95.46, + "end": 95.72, + "confidence": 0.995 + }, + { + "text": "in", + "start": 95.72, + "end": 95.8, + "confidence": 0.996 + }, + { + "text": "my", + "start": 95.8, + "end": 96.1, + "confidence": 0.994 + }, + { + "text": "mind", + "start": 96.1, + "end": 96.4, + "confidence": 0.999 + }, + { + "text": "and", + "start": 96.4, + "end": 96.6, + "confidence": 0.759 + }, + { + "text": "I'm", + "start": 96.6, + "end": 96.72, + "confidence": 0.991 + }, + { + "text": "waiting", + "start": 96.72, + "end": 97.14, + "confidence": 0.988 + }, + { + "text": "for", + "start": 97.14, + "end": 97.36, + "confidence": 0.996 + }, + { + "text": "the", + "start": 97.36, + "end": 97.54, + "confidence": 0.996 + }, + { + "text": "time", + "start": 97.54, + "end": 97.82, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 8856, + "start": 97.98, + "end": 101.26, + "text": " When I show you what it's like to be worse but in the mind", + "tokens": [ + 50832, + 1133, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 457, + 294, + 264, + 1575, + 51000 + ], + "temperature": 0.0, + "avg_logprob": -0.2874426958037586, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7892544865608215, + "confidence": 0.756, + "words": [ + { + "text": "When", + "start": 97.98, + "end": 98.14, + "confidence": 0.922 + }, + { + "text": "I", + "start": 98.14, + "end": 98.32, + "confidence": 0.993 + }, + { + "text": "show", + "start": 98.32, + "end": 98.5, + "confidence": 0.958 + }, + { + "text": "you", + "start": 98.5, + "end": 98.76, + "confidence": 0.994 + }, + { + "text": "what", + "start": 98.76, + "end": 98.94, + "confidence": 0.996 + }, + { + "text": "it's", + "start": 98.94, + "end": 99.24, + "confidence": 0.988 + }, + { + "text": "like", + "start": 99.24, + "end": 99.46, + "confidence": 0.998 + }, + { + "text": "to", + "start": 99.46, + "end": 99.7, + "confidence": 0.875 + }, + { + "text": "be", + "start": 99.7, + "end": 99.84, + "confidence": 0.997 + }, + { + "text": "worse", + "start": 99.84, + "end": 100.08, + "confidence": 0.445 + }, + { + "text": "but", + "start": 100.08, + "end": 100.4, + "confidence": 0.199 + }, + { + "text": "in", + "start": 100.4, + "end": 100.58, + "confidence": 0.863 + }, + { + "text": "the", + "start": 100.58, + "end": 100.8, + "confidence": 0.428 + }, + { + "text": "mind", + "start": 100.8, + "end": 101.26, + "confidence": 0.622 + } + ] + }, + { + "id": 9, + "seek": 8856, + "start": 101.34, + "end": 106.84, + "text": " Tell you you're the greatest", + "tokens": [ + 51000, + 5115, + 291, + 291, + 434, + 264, + 6636, + 51292 + ], + "temperature": 0.0, + "avg_logprob": -0.2874426958037586, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7892544865608215, + "confidence": 0.94, + "words": [ + { + "text": "Tell", + "start": 101.34, + "end": 102.22, + "confidence": 0.937 + }, + { + "text": "you", + "start": 102.22, + "end": 102.96, + "confidence": 0.993 + }, + { + "text": "you're", + "start": 102.96, + "end": 103.94, + "confidence": 0.873 + }, + { + "text": "the", + "start": 103.94, + "end": 104.36, + "confidence": 0.985 + }, + { + "text": "greatest", + "start": 104.36, + "end": 106.84, + "confidence": 0.986 + } + ] + }, + { + "id": 10, + "seek": 8856, + "start": 107.2, + "end": 113.02, + "text": " But once you turn they hate us", + "tokens": [ + 51292, + 583, + 1564, + 291, + 1261, + 436, + 4700, + 505, + 51592 + ], + "temperature": 0.0, + "avg_logprob": -0.2874426958037586, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7892544865608215, + "confidence": 0.909, + "words": [ + { + "text": "But", + "start": 107.2, + "end": 107.62, + "confidence": 0.963 + }, + { + "text": "once", + "start": 107.62, + "end": 108.42, + "confidence": 0.786 + }, + { + "text": "you", + "start": 108.42, + "end": 109.28, + "confidence": 0.998 + }, + { + "text": "turn", + "start": 109.28, + "end": 109.86, + "confidence": 0.914 + }, + { + "text": "they", + "start": 109.86, + "end": 110.38, + "confidence": 0.754 + }, + { + "text": "hate", + "start": 110.38, + "end": 111.92, + "confidence": 0.984 + }, + { + "text": "us", + "start": 111.92, + "end": 113.02, + "confidence": 0.999 + } + ] + }, + { + "id": 11, + "seek": 11312, + "start": 113.38, + "end": 119.78, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 50364, + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.27150371839415355, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.865480363368988, + "confidence": 0.804, + "words": [ + { + "text": "Oh,", + "start": 113.38, + "end": 114.98, + "confidence": 0.317 + }, + { + "text": "the", + "start": 115.02, + "end": 115.42, + "confidence": 0.964 + }, + { + "text": "misery", + "start": 115.42, + "end": 116.26, + "confidence": 0.995 + }, + { + "text": "Everybody", + "start": 116.26, + "end": 117.58, + "confidence": 0.482 + }, + { + "text": "wants", + "start": 117.58, + "end": 117.96, + "confidence": 0.986 + }, + { + "text": "to", + "start": 117.96, + "end": 118.14, + "confidence": 0.996 + }, + { + "text": "be", + "start": 118.14, + "end": 118.32, + "confidence": 0.998 + }, + { + "text": "my", + "start": 118.32, + "end": 118.56, + "confidence": 0.993 + }, + { + "text": "enemy", + "start": 118.56, + "end": 119.78, + "confidence": 0.983 + } + ] + }, + { + "id": 12, + "seek": 11312, + "start": 120.7, + "end": 126.28, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 50714, + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.27150371839415355, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.865480363368988, + "confidence": 0.874, + "words": [ + { + "text": "Spare", + "start": 120.7, + "end": 121.4, + "confidence": 0.576 + }, + { + "text": "the", + "start": 121.4, + "end": 121.7, + "confidence": 0.985 + }, + { + "text": "sympathy", + "start": 121.7, + "end": 122.5, + "confidence": 0.994 + }, + { + "text": "Everybody", + "start": 122.5, + "end": 123.84, + "confidence": 0.969 + }, + { + "text": "wants", + "start": 123.84, + "end": 124.18, + "confidence": 0.994 + }, + { + "text": "to", + "start": 124.18, + "end": 124.42, + "confidence": 0.999 + }, + { + "text": "be", + "start": 124.42, + "end": 125.24, + "confidence": 0.999 + }, + { + "text": "my", + "start": 125.24, + "end": 125.62, + "confidence": 0.836 + }, + { + "text": "enemy", + "start": 125.62, + "end": 126.28, + "confidence": 0.998 + } + ] + }, + { + "id": 13, + "seek": 11312, + "start": 126.62, + "end": 132.44, + "text": " Look out for yourself My enemy", + "tokens": [ + 51014, + 2053, + 484, + 337, + 1803, + 1222, + 5945, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.27150371839415355, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.865480363368988, + "confidence": 0.794, + "words": [ + { + "text": "Look", + "start": 126.62, + "end": 130.12, + "confidence": 0.593 + }, + { + "text": "out", + "start": 130.12, + "end": 130.32, + "confidence": 0.893 + }, + { + "text": "for", + "start": 130.32, + "end": 130.48, + "confidence": 0.978 + }, + { + "text": "yourself", + "start": 130.48, + "end": 130.94, + "confidence": 0.988 + }, + { + "text": "My", + "start": 130.94, + "end": 131.78, + "confidence": 0.496 + }, + { + "text": "enemy", + "start": 131.78, + "end": 132.44, + "confidence": 0.989 + } + ] + }, + { + "id": 14, + "seek": 11312, + "start": 133.72, + "end": 137.14, + "text": " Look out for yourself", + "tokens": [ + 51314, + 2053, + 484, + 337, + 1803, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.27150371839415355, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.865480363368988, + "confidence": 0.97, + "words": [ + { + "text": "Look", + "start": 133.72, + "end": 134.66, + "confidence": 0.906 + }, + { + "text": "out", + "start": 134.66, + "end": 136.54, + "confidence": 0.978 + }, + { + "text": "for", + "start": 136.54, + "end": 136.74, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.74, + "end": 137.14, + "confidence": 0.999 + } + ] + }, + { + "id": 15, + "seek": 11312, + "start": 137.6, + "end": 140.24, + "text": " Look, okay I'm hoping that somebody pray for me", + "tokens": [ + 51564, + 2053, + 11, + 1392, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.27150371839415355, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.865480363368988, + "confidence": 0.786, + "words": [ + { + "text": "Look,", + "start": 137.6, + "end": 137.88, + "confidence": 0.728 + }, + { + "text": "okay", + "start": 138.4, + "end": 138.6, + "confidence": 0.728 + }, + { + "text": "I'm", + "start": 138.6, + "end": 138.94, + "confidence": 0.699 + }, + { + "text": "hoping", + "start": 138.94, + "end": 139.16, + "confidence": 0.502 + }, + { + "text": "that", + "start": 139.16, + "end": 139.34, + "confidence": 0.959 + }, + { + "text": "somebody", + "start": 139.34, + "end": 139.58, + "confidence": 0.981 + }, + { + "text": "pray", + "start": 139.58, + "end": 139.84, + "confidence": 0.821 + }, + { + "text": "for", + "start": 139.84, + "end": 140.02, + "confidence": 0.901 + }, + { + "text": "me", + "start": 140.02, + "end": 140.24, + "confidence": 0.999 + } + ] + }, + { + "id": 16, + "seek": 14012, + "start": 140.34, + "end": 141.7, + "text": " I'm praying that somebody hold for me.", + "tokens": [ + 50364, + 286, + 478, + 15611, + 300, + 2618, + 1797, + 337, + 385, + 13, + 50439 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.794, + "words": [ + { + "text": "I'm", + "start": 140.34, + "end": 140.44, + "confidence": 0.819 + }, + { + "text": "praying", + "start": 140.44, + "end": 140.66, + "confidence": 0.694 + }, + { + "text": "that", + "start": 140.66, + "end": 140.84, + "confidence": 0.787 + }, + { + "text": "somebody", + "start": 140.84, + "end": 141.1, + "confidence": 0.985 + }, + { + "text": "hold", + "start": 141.1, + "end": 141.36, + "confidence": 0.442 + }, + { + "text": "for", + "start": 141.36, + "end": 141.52, + "confidence": 0.992 + }, + { + "text": "me.", + "start": 141.52, + "end": 141.7, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 14012, + "start": 141.76, + "end": 143.2, + "text": " I'm staying where nobody's supposed to be.", + "tokens": [ + 50439, + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312, + 13, + 50516 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.853, + "words": [ + { + "text": "I'm", + "start": 141.76, + "end": 141.98, + "confidence": 0.976 + }, + { + "text": "staying", + "start": 141.98, + "end": 142.16, + "confidence": 0.708 + }, + { + "text": "where", + "start": 142.16, + "end": 142.34, + "confidence": 0.906 + }, + { + "text": "nobody's", + "start": 142.34, + "end": 142.68, + "confidence": 0.655 + }, + { + "text": "supposed", + "start": 142.68, + "end": 142.94, + "confidence": 0.923 + }, + { + "text": "to", + "start": 142.94, + "end": 143.1, + "confidence": 0.992 + }, + { + "text": "be.", + "start": 143.1, + "end": 143.2, + "confidence": 0.997 + } + ] + }, + { + "id": 18, + "seek": 14012, + "start": 143.24, + "end": 144.98, + "text": " I propose to be in a wreck of emotions.", + "tokens": [ + 50516, + 286, + 17421, + 281, + 312, + 294, + 257, + 21478, + 295, + 8462, + 13, + 50610 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.646, + "words": [ + { + "text": "I", + "start": 143.24, + "end": 143.42, + "confidence": 0.492 + }, + { + "text": "propose", + "start": 143.42, + "end": 143.72, + "confidence": 0.333 + }, + { + "text": "to", + "start": 143.72, + "end": 143.88, + "confidence": 0.778 + }, + { + "text": "be", + "start": 143.88, + "end": 144.02, + "confidence": 0.442 + }, + { + "text": "in", + "start": 144.02, + "end": 144.18, + "confidence": 0.812 + }, + { + "text": "a", + "start": 144.18, + "end": 144.28, + "confidence": 0.559 + }, + { + "text": "wreck", + "start": 144.28, + "end": 144.4, + "confidence": 0.811 + }, + { + "text": "of", + "start": 144.4, + "end": 144.54, + "confidence": 0.99 + }, + { + "text": "emotions.", + "start": 144.54, + "end": 144.98, + "confidence": 0.953 + } + ] + }, + { + "id": 19, + "seek": 14012, + "start": 145.06, + "end": 146.3, + "text": " Ready to go whenever you let me know.", + "tokens": [ + 50610, + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458, + 13, + 50672 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.834, + "words": [ + { + "text": "Ready", + "start": 145.06, + "end": 145.3, + "confidence": 0.951 + }, + { + "text": "to", + "start": 145.3, + "end": 145.46, + "confidence": 0.997 + }, + { + "text": "go", + "start": 145.46, + "end": 145.6, + "confidence": 0.997 + }, + { + "text": "whenever", + "start": 145.6, + "end": 145.78, + "confidence": 0.661 + }, + { + "text": "you", + "start": 145.78, + "end": 145.94, + "confidence": 0.403 + }, + { + "text": "let", + "start": 145.94, + "end": 146.08, + "confidence": 0.952 + }, + { + "text": "me", + "start": 146.08, + "end": 146.18, + "confidence": 0.998 + }, + { + "text": "know.", + "start": 146.18, + "end": 146.3, + "confidence": 0.977 + } + ] + }, + { + "id": 20, + "seek": 14012, + "start": 146.36, + "end": 147.88, + "text": " The road is long, so put the pedal into the flow.", + "tokens": [ + 50672, + 440, + 3060, + 307, + 938, + 11, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095, + 13, + 50749 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.959, + "words": [ + { + "text": "The", + "start": 146.36, + "end": 146.52, + "confidence": 0.99 + }, + { + "text": "road", + "start": 146.52, + "end": 146.68, + "confidence": 0.99 + }, + { + "text": "is", + "start": 146.68, + "end": 146.78, + "confidence": 0.987 + }, + { + "text": "long,", + "start": 146.78, + "end": 146.9, + "confidence": 0.961 + }, + { + "text": "so", + "start": 146.92, + "end": 147.02, + "confidence": 0.968 + }, + { + "text": "put", + "start": 147.02, + "end": 147.16, + "confidence": 0.954 + }, + { + "text": "the", + "start": 147.16, + "end": 147.28, + "confidence": 0.972 + }, + { + "text": "pedal", + "start": 147.28, + "end": 147.4, + "confidence": 0.993 + }, + { + "text": "into", + "start": 147.4, + "end": 147.6, + "confidence": 0.9 + }, + { + "text": "the", + "start": 147.6, + "end": 147.76, + "confidence": 0.992 + }, + { + "text": "flow.", + "start": 147.76, + "end": 147.88, + "confidence": 0.856 + } + ] + }, + { + "id": 21, + "seek": 14012, + "start": 147.94, + "end": 149.78, + "text": " The enemy on my trail, my energy unavailable.", + "tokens": [ + 50749, + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699, + 13, + 50856 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.948, + "words": [ + { + "text": "The", + "start": 147.94, + "end": 148.08, + "confidence": 0.968 + }, + { + "text": "enemy", + "start": 148.08, + "end": 148.26, + "confidence": 0.726 + }, + { + "text": "on", + "start": 148.26, + "end": 148.38, + "confidence": 0.974 + }, + { + "text": "my", + "start": 148.38, + "end": 148.5, + "confidence": 0.974 + }, + { + "text": "trail,", + "start": 148.5, + "end": 148.64, + "confidence": 0.986 + }, + { + "text": "my", + "start": 148.68, + "end": 148.86, + "confidence": 0.99 + }, + { + "text": "energy", + "start": 148.86, + "end": 149.1, + "confidence": 0.996 + }, + { + "text": "unavailable.", + "start": 149.1, + "end": 149.78, + "confidence": 0.978 + } + ] + }, + { + "id": 22, + "seek": 14012, + "start": 150.0, + "end": 151.35, + "text": " I'ma tell them I said away, go away.", + "tokens": [ + 50856, + 286, + 478, + 64, + 980, + 552, + 286, + 848, + 1314, + 11, + 352, + 1314, + 13, + 50924 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.571, + "words": [ + { + "text": "I'ma", + "start": 150.0, + "end": 150.26, + "confidence": 0.765 + }, + { + "text": "tell", + "start": 150.26, + "end": 150.36, + "confidence": 0.989 + }, + { + "text": "them", + "start": 150.36, + "end": 150.5, + "confidence": 0.335 + }, + { + "text": "I", + "start": 150.5, + "end": 150.62, + "confidence": 0.359 + }, + { + "text": "said", + "start": 150.62, + "end": 150.76, + "confidence": 0.3 + }, + { + "text": "away,", + "start": 150.76, + "end": 151.0, + "confidence": 0.325 + }, + { + "text": "go", + "start": 151.14, + "end": 151.18, + "confidence": 0.911 + }, + { + "text": "away.", + "start": 151.18, + "end": 151.35, + "confidence": 0.781 + } + ] + }, + { + "id": 23, + "seek": 14012, + "start": 151.35, + "end": 152.55, + "text": " When I'm plotting, I'ma drive to the top.", + "tokens": [ + 50924, + 1133, + 286, + 478, + 41178, + 11, + 286, + 478, + 64, + 3332, + 281, + 264, + 1192, + 13, + 50985 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.644, + "words": [ + { + "text": "When", + "start": 151.35, + "end": 151.56, + "confidence": 0.886 + }, + { + "text": "I'm", + "start": 151.56, + "end": 151.72, + "confidence": 0.592 + }, + { + "text": "plotting,", + "start": 151.72, + "end": 151.82, + "confidence": 0.365 + }, + { + "text": "I'ma", + "start": 151.84, + "end": 152.0, + "confidence": 0.487 + }, + { + "text": "drive", + "start": 152.0, + "end": 152.16, + "confidence": 0.622 + }, + { + "text": "to", + "start": 152.16, + "end": 152.3, + "confidence": 0.984 + }, + { + "text": "the", + "start": 152.3, + "end": 152.42, + "confidence": 0.997 + }, + { + "text": "top.", + "start": 152.42, + "end": 152.55, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 14012, + "start": 152.55, + "end": 153.74, + "text": " I've been out of shape, thinking out of the box.", + "tokens": [ + 50985, + 286, + 600, + 668, + 484, + 295, + 3909, + 11, + 1953, + 484, + 295, + 264, + 2424, + 13, + 51043 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.769, + "words": [ + { + "text": "I've", + "start": 152.55, + "end": 152.7, + "confidence": 0.723 + }, + { + "text": "been", + "start": 152.7, + "end": 152.78, + "confidence": 0.998 + }, + { + "text": "out", + "start": 152.78, + "end": 152.9, + "confidence": 0.905 + }, + { + "text": "of", + "start": 152.9, + "end": 153.02, + "confidence": 0.986 + }, + { + "text": "shape,", + "start": 153.02, + "end": 153.1, + "confidence": 0.998 + }, + { + "text": "thinking", + "start": 153.26, + "end": 153.32, + "confidence": 0.842 + }, + { + "text": "out", + "start": 153.32, + "end": 153.46, + "confidence": 0.343 + }, + { + "text": "of", + "start": 153.46, + "end": 153.56, + "confidence": 0.505 + }, + { + "text": "the", + "start": 153.56, + "end": 153.62, + "confidence": 0.82 + }, + { + "text": "box.", + "start": 153.62, + "end": 153.74, + "confidence": 0.998 + } + ] + }, + { + "id": 25, + "seek": 14012, + "start": 153.74, + "end": 155.34, + "text": " I'm an astronaut, blasted off the planet.", + "tokens": [ + 51043, + 286, + 478, + 364, + 18516, + 11, + 12035, + 292, + 766, + 264, + 5054, + 13, + 51124 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.967, + "words": [ + { + "text": "I'm", + "start": 153.74, + "end": 153.92, + "confidence": 0.996 + }, + { + "text": "an", + "start": 153.92, + "end": 154.06, + "confidence": 0.996 + }, + { + "text": "astronaut,", + "start": 154.06, + "end": 154.4, + "confidence": 0.991 + }, + { + "text": "blasted", + "start": 154.54, + "end": 154.88, + "confidence": 0.895 + }, + { + "text": "off", + "start": 154.88, + "end": 155.02, + "confidence": 0.987 + }, + { + "text": "the", + "start": 155.02, + "end": 155.18, + "confidence": 0.959 + }, + { + "text": "planet.", + "start": 155.18, + "end": 155.34, + "confidence": 0.997 + } + ] + }, + { + "id": 26, + "seek": 14012, + "start": 155.4, + "end": 157.18, + "text": " Rock the cars, catastrophic, and it matters more", + "tokens": [ + 51124, + 6922, + 264, + 5163, + 11, + 34915, + 11, + 293, + 309, + 7001, + 544, + 51212 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.703, + "words": [ + { + "text": "Rock", + "start": 155.4, + "end": 155.6, + "confidence": 0.564 + }, + { + "text": "the", + "start": 155.6, + "end": 155.74, + "confidence": 0.505 + }, + { + "text": "cars,", + "start": 155.74, + "end": 155.88, + "confidence": 0.47 + }, + { + "text": "catastrophic,", + "start": 155.9, + "end": 156.4, + "confidence": 0.771 + }, + { + "text": "and", + "start": 156.46, + "end": 156.62, + "confidence": 0.925 + }, + { + "text": "it", + "start": 156.62, + "end": 156.76, + "confidence": 0.733 + }, + { + "text": "matters", + "start": 156.76, + "end": 156.94, + "confidence": 0.878 + }, + { + "text": "more", + "start": 156.94, + "end": 157.18, + "confidence": 0.974 + } + ] + }, + { + "id": 27, + "seek": 14012, + "start": 157.18, + "end": 157.91, + "text": " because I had it now.", + "tokens": [ + 51212, + 570, + 286, + 632, + 309, + 586, + 13, + 51254 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.649, + "words": [ + { + "text": "because", + "start": 157.18, + "end": 157.42, + "confidence": 0.99 + }, + { + "text": "I", + "start": 157.42, + "end": 157.56, + "confidence": 0.532 + }, + { + "text": "had", + "start": 157.56, + "end": 157.66, + "confidence": 0.936 + }, + { + "text": "it", + "start": 157.66, + "end": 157.82, + "confidence": 0.984 + }, + { + "text": "now.", + "start": 157.82, + "end": 157.91, + "confidence": 0.237 + } + ] + }, + { + "id": 28, + "seek": 14012, + "start": 157.91, + "end": 159.84, + "text": " Had I thought about wreaking havoc on an opposition.", + "tokens": [ + 51254, + 12298, + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504, + 13, + 51350 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.954, + "words": [ + { + "text": "Had", + "start": 157.91, + "end": 158.12, + "confidence": 0.843 + }, + { + "text": "I", + "start": 158.12, + "end": 158.2, + "confidence": 0.971 + }, + { + "text": "thought", + "start": 158.2, + "end": 158.38, + "confidence": 0.991 + }, + { + "text": "about", + "start": 158.38, + "end": 158.58, + "confidence": 0.995 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.86, + "confidence": 0.996 + }, + { + "text": "havoc", + "start": 158.86, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.32, + "confidence": 0.862 + }, + { + "text": "an", + "start": 159.32, + "end": 159.46, + "confidence": 0.913 + }, + { + "text": "opposition.", + "start": 159.46, + "end": 159.84, + "confidence": 0.991 + } + ] + }, + { + "id": 29, + "seek": 14012, + "start": 159.86, + "end": 161.02, + "text": " Kind of shocking, they want it static.", + "tokens": [ + 51350, + 9242, + 295, + 18776, + 11, + 436, + 528, + 309, + 13437, + 13, + 51410 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.667, + "words": [ + { + "text": "Kind", + "start": 159.86, + "end": 160.06, + "confidence": 0.661 + }, + { + "text": "of", + "start": 160.06, + "end": 160.18, + "confidence": 0.995 + }, + { + "text": "shocking,", + "start": 160.18, + "end": 160.38, + "confidence": 0.656 + }, + { + "text": "they", + "start": 160.52, + "end": 160.6, + "confidence": 0.366 + }, + { + "text": "want", + "start": 160.6, + "end": 160.74, + "confidence": 0.574 + }, + { + "text": "it", + "start": 160.74, + "end": 160.88, + "confidence": 0.651 + }, + { + "text": "static.", + "start": 160.88, + "end": 161.02, + "confidence": 0.993 + } + ] + }, + { + "id": 30, + "seek": 14012, + "start": 161.08, + "end": 162.18, + "text": " With precision, I'm automatic.", + "tokens": [ + 51410, + 2022, + 18356, + 11, + 286, + 478, + 12509, + 13, + 51468 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.874, + "words": [ + { + "text": "With", + "start": 161.08, + "end": 161.28, + "confidence": 0.587 + }, + { + "text": "precision,", + "start": 161.28, + "end": 161.58, + "confidence": 0.904 + }, + { + "text": "I'm", + "start": 161.68, + "end": 161.88, + "confidence": 0.987 + }, + { + "text": "automatic.", + "start": 161.88, + "end": 162.18, + "confidence": 0.987 + } + ] + }, + { + "id": 31, + "seek": 14012, + "start": 162.24, + "end": 163.34, + "text": " Quarterback, I ain't talking second.", + "tokens": [ + 51468, + 43794, + 3207, + 11, + 286, + 7862, + 380, + 1417, + 1150, + 13, + 51527 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.687, + "words": [ + { + "text": "Quarterback,", + "start": 162.24, + "end": 162.66, + "confidence": 0.511 + }, + { + "text": "I", + "start": 162.66, + "end": 162.78, + "confidence": 0.964 + }, + { + "text": "ain't", + "start": 162.78, + "end": 162.9, + "confidence": 0.99 + }, + { + "text": "talking", + "start": 162.9, + "end": 163.1, + "confidence": 0.891 + }, + { + "text": "second.", + "start": 163.1, + "end": 163.34, + "confidence": 0.327 + } + ] + }, + { + "id": 32, + "seek": 14012, + "start": 163.36, + "end": 164.12, + "text": " Pack it, pack it up.", + "tokens": [ + 51527, + 18466, + 309, + 11, + 2844, + 309, + 493, + 13, + 51562 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.973, + "words": [ + { + "text": "Pack", + "start": 163.36, + "end": 163.62, + "confidence": 0.967 + }, + { + "text": "it,", + "start": 163.62, + "end": 163.68, + "confidence": 0.937 + }, + { + "text": "pack", + "start": 163.68, + "end": 163.88, + "confidence": 0.986 + }, + { + "text": "it", + "start": 163.88, + "end": 164.0, + "confidence": 0.999 + }, + { + "text": "up.", + "start": 164.0, + "end": 164.12, + "confidence": 0.979 + } + ] + }, + { + "id": 33, + "seek": 14012, + "start": 164.12, + "end": 165.04, + "text": " I don't panic, better, better.", + "tokens": [ + 51562, + 286, + 500, + 380, + 14783, + 11, + 1101, + 11, + 1101, + 13, + 51612 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.76, + "words": [ + { + "text": "I", + "start": 164.12, + "end": 164.18, + "confidence": 0.984 + }, + { + "text": "don't", + "start": 164.18, + "end": 164.34, + "confidence": 0.912 + }, + { + "text": "panic,", + "start": 164.34, + "end": 164.58, + "confidence": 0.999 + }, + { + "text": "better,", + "start": 164.6, + "end": 164.86, + "confidence": 0.424 + }, + { + "text": "better.", + "start": 164.86, + "end": 165.04, + "confidence": 0.555 + } + ] + }, + { + "id": 34, + "seek": 14012, + "start": 165.14, + "end": 165.74, + "text": " Up who the baddest.", + "tokens": [ + 51612, + 5858, + 567, + 264, + 1578, + 23748, + 13, + 51644 + ], + "temperature": 0.0, + "avg_logprob": -0.36420179578993056, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7754144668579102, + "confidence": 0.75, + "words": [ + { + "text": "Up", + "start": 165.14, + "end": 165.28, + "confidence": 0.517 + }, + { + "text": "who", + "start": 165.28, + "end": 165.4, + "confidence": 0.747 + }, + { + "text": "the", + "start": 165.4, + "end": 165.54, + "confidence": 0.926 + }, + { + "text": "baddest.", + "start": 165.54, + "end": 165.74, + "confidence": 0.816 + } + ] + }, + { + "id": 35, + "seek": 16572, + "start": 165.74, + "end": 166.84, + "text": " it don't matter cause we is your enemy.", + "tokens": [ + 50364, + 309, + 500, + 380, + 1871, + 3082, + 321, + 307, + 428, + 5945, + 13, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.9108315981351413, + "compression_ratio": 0.8297872340425532, + "no_speech_prob": 0.6954025626182556, + "confidence": 0.447, + "words": [ + { + "text": "it", + "start": 165.74, + "end": 165.88, + "confidence": 0.125 + }, + { + "text": "don't", + "start": 165.88, + "end": 166.06, + "confidence": 0.825 + }, + { + "text": "matter", + "start": 166.06, + "end": 166.22, + "confidence": 0.996 + }, + { + "text": "cause", + "start": 166.22, + "end": 166.4, + "confidence": 0.287 + }, + { + "text": "we", + "start": 166.4, + "end": 166.56, + "confidence": 0.889 + }, + { + "text": "is", + "start": 166.56, + "end": 166.66, + "confidence": 0.229 + }, + { + "text": "your", + "start": 166.66, + "end": 166.76, + "confidence": 0.561 + }, + { + "text": "enemy.", + "start": 166.76, + "end": 166.84, + "confidence": 0.257 + } + ] + }, + { + "id": 36, + "seek": 19572, + "start": 195.72, + "end": 198.2, + "text": " I swear I'll never be insane", + "tokens": [ + 50364, + 286, + 11902, + 286, + 603, + 1128, + 312, + 10838, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.5534874200820923, + "compression_ratio": 1.0, + "no_speech_prob": 0.6747308969497681, + "confidence": 0.773, + "words": [ + { + "text": "I", + "start": 195.72, + "end": 196.38, + "confidence": 0.745 + }, + { + "text": "swear", + "start": 196.38, + "end": 196.74, + "confidence": 0.963 + }, + { + "text": "I'll", + "start": 196.74, + "end": 197.18, + "confidence": 0.665 + }, + { + "text": "never", + "start": 197.18, + "end": 197.58, + "confidence": 0.997 + }, + { + "text": "be", + "start": 197.58, + "end": 197.92, + "confidence": 0.994 + }, + { + "text": "insane", + "start": 197.92, + "end": 198.2, + "confidence": 0.525 + } + ] + }, + { + "id": 37, + "seek": 19572, + "start": 198.5, + "end": 199.38, + "text": " You gotta be insane", + "tokens": [ + 50464, + 509, + 3428, + 312, + 10838, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.5534874200820923, + "compression_ratio": 1.0, + "no_speech_prob": 0.6747308969497681, + "confidence": 0.492, + "words": [ + { + "text": "You", + "start": 198.5, + "end": 198.7, + "confidence": 0.4 + }, + { + "text": "gotta", + "start": 198.7, + "end": 198.88, + "confidence": 0.268 + }, + { + "text": "be", + "start": 198.88, + "end": 199.12, + "confidence": 0.959 + }, + { + "text": "insane", + "start": 199.12, + "end": 199.38, + "confidence": 0.568 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/random.nocond_apollo11.mp3.words.json b/tests/expected/corner_cases/random.nocond_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..d4814a97549c0758cade1cb8eb26d2b28b68f751 --- /dev/null +++ b/tests/expected/corner_cases/random.nocond_apollo11.mp3.words.json @@ -0,0 +1,1970 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like the... Yeah, I'll put that there, my friend. They make the one that's on the helmet we were going to have in B1. And you can put the other one on the mic helmet with those GVA flipper strings. I got them. I got them. They're the better now. They're the one that got the one right there. They're going to the three three states. We got them in there. I got them in there. And I got them in there. I got them in there. I got them in there. I got them in there. I got them in there. Yeah, I'm thinking next week on the field. Hey, we were... You wanna hang me on the ground with the cover, I tried it already. Okay, fine, we weren't sure of that, just a suggestion. We thought we'd... You could check it out. I'm not sure if you've already done that. So, I guess we're gonna come up with it, let us know. Okay, no problem. Okay, no problem. We'll let you know when the end of the... ...sun. Bye.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 6.54, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.1, + "avg_logprob": -0.7575161404079861, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.541, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.88, + "confidence": 0.156 + }, + { + "text": "11,", + "start": 0.88, + "end": 1.26, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.72, + "end": 1.94, + "confidence": 0.518 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.968 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.909 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.972 + }, + { + "text": "Soyuz-VA", + "start": 3.92, + "end": 5.16, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.16, + "end": 5.74, + "confidence": 0.474 + }, + { + "text": "GVA.", + "start": 5.74, + "end": 6.54, + "confidence": 0.435 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 13.48, + "text": " Alright, okay, we like the...", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 11, + 321, + 411, + 264, + 485, + 51014 + ], + "temperature": 0.1, + "avg_logprob": -0.7575161404079861, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.412, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 11.04, + "confidence": 0.308 + }, + { + "text": "okay,", + "start": 11.68, + "end": 12.22, + "confidence": 0.507 + }, + { + "text": "we", + "start": 12.5, + "end": 12.98, + "confidence": 0.609 + }, + { + "text": "like", + "start": 12.98, + "end": 13.26, + "confidence": 0.501 + }, + { + "text": "the...", + "start": 13.26, + "end": 13.48, + "confidence": 0.249 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 13.48, + "end": 14.6, + "text": " Yeah, I'll put that there, my friend.", + "tokens": [ + 51014, + 865, + 11, + 286, + 603, + 829, + 300, + 456, + 11, + 452, + 1277, + 13, + 51114 + ], + "temperature": 0.1, + "avg_logprob": -0.7575161404079861, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.218, + "words": [ + { + "text": "Yeah,", + "start": 13.48, + "end": 13.5, + "confidence": 0.28 + }, + { + "text": "I'll", + "start": 13.5, + "end": 13.78, + "confidence": 0.196 + }, + { + "text": "put", + "start": 13.78, + "end": 13.96, + "confidence": 0.207 + }, + { + "text": "that", + "start": 13.96, + "end": 14.12, + "confidence": 0.86 + }, + { + "text": "there,", + "start": 14.12, + "end": 14.38, + "confidence": 0.325 + }, + { + "text": "my", + "start": 14.44, + "end": 14.54, + "confidence": 0.095 + }, + { + "text": "friend.", + "start": 14.54, + "end": 14.6, + "confidence": 0.088 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 14.6, + "end": 19.08, + "text": " They make the one that's on the helmet we were going to have in B1.", + "tokens": [ + 51114, + 814, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.1, + "avg_logprob": -0.7575161404079861, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.6, + "words": [ + { + "text": "They", + "start": 14.6, + "end": 15.46, + "confidence": 0.367 + }, + { + "text": "make", + "start": 15.46, + "end": 15.68, + "confidence": 0.457 + }, + { + "text": "the", + "start": 15.68, + "end": 15.84, + "confidence": 0.358 + }, + { + "text": "one", + "start": 15.84, + "end": 16.06, + "confidence": 0.71 + }, + { + "text": "that's", + "start": 16.06, + "end": 16.28, + "confidence": 0.572 + }, + { + "text": "on", + "start": 16.28, + "end": 16.48, + "confidence": 0.635 + }, + { + "text": "the", + "start": 16.48, + "end": 16.78, + "confidence": 0.856 + }, + { + "text": "helmet", + "start": 16.78, + "end": 17.26, + "confidence": 0.893 + }, + { + "text": "we", + "start": 17.26, + "end": 17.52, + "confidence": 0.185 + }, + { + "text": "were", + "start": 17.52, + "end": 17.78, + "confidence": 0.529 + }, + { + "text": "going", + "start": 17.78, + "end": 17.92, + "confidence": 0.549 + }, + { + "text": "to", + "start": 17.92, + "end": 17.98, + "confidence": 0.983 + }, + { + "text": "have", + "start": 17.98, + "end": 18.2, + "confidence": 0.959 + }, + { + "text": "in", + "start": 18.2, + "end": 18.38, + "confidence": 0.843 + }, + { + "text": "B1.", + "start": 18.38, + "end": 19.08, + "confidence": 0.764 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 20.12, + "end": 24.52, + "text": " And you can put the other one on the mic helmet with those GVA flipper strings.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 932, + 15124, + 13985, + 13, + 51614 + ], + "temperature": 0.1, + "avg_logprob": -0.7575161404079861, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.468, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.16, + "confidence": 0.533 + }, + { + "text": "you", + "start": 20.16, + "end": 20.32, + "confidence": 0.942 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.74 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.992 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.99 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.993 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.982 + }, + { + "text": "on", + "start": 21.18, + "end": 21.42, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.42, + "end": 21.94, + "confidence": 0.461 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.48, + "confidence": 0.385 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.8, + "confidence": 0.89 + }, + { + "text": "with", + "start": 22.8, + "end": 23.06, + "confidence": 0.432 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.45 + }, + { + "text": "GVA", + "start": 23.3, + "end": 23.74, + "confidence": 0.224 + }, + { + "text": "flipper", + "start": 23.74, + "end": 24.18, + "confidence": 0.102 + }, + { + "text": "strings.", + "start": 24.18, + "end": 24.52, + "confidence": 0.206 + } + ] + }, + { + "id": 5, + "seek": 2500, + "start": 31.32, + "end": 34.03, + "text": " I got them. I got them. They're the better now.", + "tokens": [ + 50364, + 286, + 658, + 552, + 13, + 286, + 658, + 552, + 13, + 814, + 434, + 264, + 1101, + 586, + 13, + 50814 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.45, + "words": [ + { + "text": "I", + "start": 31.32, + "end": 31.52, + "confidence": 0.122 + }, + { + "text": "got", + "start": 31.52, + "end": 31.82, + "confidence": 0.464 + }, + { + "text": "them.", + "start": 31.82, + "end": 32.12, + "confidence": 0.263 + }, + { + "text": "I", + "start": 32.44, + "end": 32.84, + "confidence": 0.386 + }, + { + "text": "got", + "start": 32.84, + "end": 33.16, + "confidence": 0.967 + }, + { + "text": "them.", + "start": 33.16, + "end": 33.44, + "confidence": 0.957 + }, + { + "text": "They're", + "start": 33.44, + "end": 33.5, + "confidence": 0.389 + }, + { + "text": "the", + "start": 33.5, + "end": 33.56, + "confidence": 0.311 + }, + { + "text": "better", + "start": 33.56, + "end": 33.76, + "confidence": 0.761 + }, + { + "text": "now.", + "start": 33.76, + "end": 34.03, + "confidence": 0.795 + } + ] + }, + { + "id": 6, + "seek": 2500, + "start": 34.03, + "end": 36.36, + "text": " They're the one that got the one right there.", + "tokens": [ + 50814, + 814, + 434, + 264, + 472, + 300, + 658, + 264, + 472, + 558, + 456, + 13, + 50964 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.276, + "words": [ + { + "text": "They're", + "start": 34.03, + "end": 34.38, + "confidence": 0.243 + }, + { + "text": "the", + "start": 34.38, + "end": 34.58, + "confidence": 0.735 + }, + { + "text": "one", + "start": 34.58, + "end": 34.94, + "confidence": 0.279 + }, + { + "text": "that", + "start": 34.94, + "end": 35.28, + "confidence": 0.459 + }, + { + "text": "got", + "start": 35.28, + "end": 35.48, + "confidence": 0.188 + }, + { + "text": "the", + "start": 35.48, + "end": 35.68, + "confidence": 0.441 + }, + { + "text": "one", + "start": 35.68, + "end": 35.76, + "confidence": 0.154 + }, + { + "text": "right", + "start": 35.76, + "end": 36.04, + "confidence": 0.08 + }, + { + "text": "there.", + "start": 36.04, + "end": 36.36, + "confidence": 0.454 + } + ] + }, + { + "id": 7, + "seek": 2500, + "start": 37.62, + "end": 39.22, + "text": " They're going to the three three states.", + "tokens": [ + 50964, + 814, + 434, + 516, + 281, + 264, + 1045, + 1045, + 4368, + 13, + 51114 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.239, + "words": [ + { + "text": "They're", + "start": 37.62, + "end": 37.96, + "confidence": 0.506 + }, + { + "text": "going", + "start": 37.96, + "end": 38.12, + "confidence": 0.908 + }, + { + "text": "to", + "start": 38.12, + "end": 38.36, + "confidence": 0.286 + }, + { + "text": "the", + "start": 38.36, + "end": 38.48, + "confidence": 0.145 + }, + { + "text": "three", + "start": 38.48, + "end": 38.74, + "confidence": 0.065 + }, + { + "text": "three", + "start": 38.74, + "end": 38.96, + "confidence": 0.177 + }, + { + "text": "states.", + "start": 38.96, + "end": 39.22, + "confidence": 0.096 + } + ] + }, + { + "id": 8, + "seek": 2500, + "start": 39.88, + "end": 40.98, + "text": " We got them in there.", + "tokens": [ + 51114, + 492, + 658, + 552, + 294, + 456, + 13, + 51164 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.726, + "words": [ + { + "text": "We", + "start": 39.88, + "end": 40.2, + "confidence": 0.618 + }, + { + "text": "got", + "start": 40.2, + "end": 40.48, + "confidence": 0.596 + }, + { + "text": "them", + "start": 40.48, + "end": 40.64, + "confidence": 0.957 + }, + { + "text": "in", + "start": 40.64, + "end": 40.74, + "confidence": 0.906 + }, + { + "text": "there.", + "start": 40.74, + "end": 40.98, + "confidence": 0.632 + } + ] + }, + { + "id": 9, + "seek": 2500, + "start": 41.24, + "end": 42.0, + "text": " I got them in there.", + "tokens": [ + 51164, + 286, + 658, + 552, + 294, + 456, + 13, + 51264 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.367, + "words": [ + { + "text": "I", + "start": 41.24, + "end": 41.32, + "confidence": 0.228 + }, + { + "text": "got", + "start": 41.32, + "end": 41.5, + "confidence": 0.182 + }, + { + "text": "them", + "start": 41.5, + "end": 41.56, + "confidence": 0.846 + }, + { + "text": "in", + "start": 41.56, + "end": 41.72, + "confidence": 0.889 + }, + { + "text": "there.", + "start": 41.72, + "end": 42.0, + "confidence": 0.213 + } + ] + }, + { + "id": 10, + "seek": 2500, + "start": 43.08, + "end": 44.68, + "text": " And I got them in there.", + "tokens": [ + 51264, + 400, + 286, + 658, + 552, + 294, + 456, + 13, + 51364 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.404, + "words": [ + { + "text": "And", + "start": 43.08, + "end": 43.32, + "confidence": 0.348 + }, + { + "text": "I", + "start": 43.32, + "end": 44.0, + "confidence": 0.309 + }, + { + "text": "got", + "start": 44.0, + "end": 44.18, + "confidence": 0.209 + }, + { + "text": "them", + "start": 44.18, + "end": 44.5, + "confidence": 0.734 + }, + { + "text": "in", + "start": 44.5, + "end": 44.54, + "confidence": 0.717 + }, + { + "text": "there.", + "start": 44.54, + "end": 44.68, + "confidence": 0.37 + } + ] + }, + { + "id": 11, + "seek": 2500, + "start": 46.22, + "end": 47.02, + "text": " I got them in there.", + "tokens": [ + 51364, + 286, + 658, + 552, + 294, + 456, + 13, + 51464 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.557, + "words": [ + { + "text": "I", + "start": 46.22, + "end": 46.36, + "confidence": 0.214 + }, + { + "text": "got", + "start": 46.36, + "end": 46.44, + "confidence": 0.459 + }, + { + "text": "them", + "start": 46.44, + "end": 46.46, + "confidence": 0.939 + }, + { + "text": "in", + "start": 46.46, + "end": 46.72, + "confidence": 0.836 + }, + { + "text": "there.", + "start": 46.72, + "end": 47.02, + "confidence": 0.698 + } + ] + }, + { + "id": 12, + "seek": 2500, + "start": 48.22, + "end": 48.74, + "text": " I got them in there.", + "tokens": [ + 51464, + 286, + 658, + 552, + 294, + 456, + 13, + 51564 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.641, + "words": [ + { + "text": "I", + "start": 48.22, + "end": 48.28, + "confidence": 0.238 + }, + { + "text": "got", + "start": 48.28, + "end": 48.64, + "confidence": 0.692 + }, + { + "text": "them", + "start": 48.64, + "end": 48.7, + "confidence": 0.956 + }, + { + "text": "in", + "start": 48.7, + "end": 48.72, + "confidence": 0.861 + }, + { + "text": "there.", + "start": 48.72, + "end": 48.74, + "confidence": 0.801 + } + ] + }, + { + "id": 13, + "seek": 2500, + "start": 48.96, + "end": 49.52, + "text": " I got them in there.", + "tokens": [ + 51564, + 286, + 658, + 552, + 294, + 456, + 13, + 51664 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.712, + "words": [ + { + "text": "I", + "start": 48.96, + "end": 49.06, + "confidence": 0.317 + }, + { + "text": "got", + "start": 49.06, + "end": 49.2, + "confidence": 0.787 + }, + { + "text": "them", + "start": 49.2, + "end": 49.32, + "confidence": 0.958 + }, + { + "text": "in", + "start": 49.32, + "end": 49.38, + "confidence": 0.872 + }, + { + "text": "there.", + "start": 49.38, + "end": 49.52, + "confidence": 0.876 + } + ] + }, + { + "id": 14, + "seek": 2500, + "start": 51.44, + "end": 52.7, + "text": " I got them in there.", + "tokens": [ + 51664, + 286, + 658, + 552, + 294, + 456, + 13, + 51764 + ], + "temperature": 0.1, + "avg_logprob": -0.7153143834586095, + "compression_ratio": 2.723809523809524, + "no_speech_prob": 0.8235867023468018, + "confidence": 0.705, + "words": [ + { + "text": "I", + "start": 51.44, + "end": 51.76, + "confidence": 0.258 + }, + { + "text": "got", + "start": 51.76, + "end": 51.86, + "confidence": 0.877 + }, + { + "text": "them", + "start": 51.86, + "end": 52.16, + "confidence": 0.965 + }, + { + "text": "in", + "start": 52.16, + "end": 52.44, + "confidence": 0.85 + }, + { + "text": "there.", + "start": 52.44, + "end": 52.7, + "confidence": 0.935 + } + ] + }, + { + "id": 15, + "seek": 5300, + "start": 53.08, + "end": 54.7, + "text": " Yeah, I'm thinking next week on the field.", + "tokens": [ + 50364, + 865, + 11, + 286, + 478, + 1953, + 958, + 1243, + 322, + 264, + 2519, + 13, + 50489 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.325, + "words": [ + { + "text": "Yeah,", + "start": 53.08, + "end": 53.26, + "confidence": 0.657 + }, + { + "text": "I'm", + "start": 53.34, + "end": 53.4, + "confidence": 0.448 + }, + { + "text": "thinking", + "start": 53.4, + "end": 53.62, + "confidence": 0.851 + }, + { + "text": "next", + "start": 53.62, + "end": 53.86, + "confidence": 0.264 + }, + { + "text": "week", + "start": 53.86, + "end": 54.06, + "confidence": 0.305 + }, + { + "text": "on", + "start": 54.06, + "end": 54.22, + "confidence": 0.181 + }, + { + "text": "the", + "start": 54.22, + "end": 54.36, + "confidence": 0.202 + }, + { + "text": "field.", + "start": 54.36, + "end": 54.7, + "confidence": 0.124 + } + ] + }, + { + "id": 16, + "seek": 5300, + "start": 56.46, + "end": 56.9, + "text": " Hey, we were...", + "tokens": [ + 50531, + 1911, + 11, + 321, + 645, + 485, + 50556 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.569, + "words": [ + { + "text": "Hey,", + "start": 56.46, + "end": 56.6, + "confidence": 0.28 + }, + { + "text": "we", + "start": 56.66, + "end": 56.76, + "confidence": 0.979 + }, + { + "text": "were...", + "start": 56.76, + "end": 56.9, + "confidence": 0.672 + } + ] + }, + { + "id": 17, + "seek": 5300, + "start": 56.9, + "end": 61.8, + "text": " You wanna hang me on the ground with the cover, I tried it already.", + "tokens": [ + 50556, + 509, + 1948, + 3967, + 385, + 322, + 264, + 2727, + 365, + 264, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13, + 50816 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.538, + "words": [ + { + "text": "You", + "start": 56.9, + "end": 56.92, + "confidence": 0.224 + }, + { + "text": "wanna", + "start": 56.92, + "end": 57.12, + "confidence": 0.446 + }, + { + "text": "hang", + "start": 57.12, + "end": 57.38, + "confidence": 0.528 + }, + { + "text": "me", + "start": 57.38, + "end": 57.58, + "confidence": 0.874 + }, + { + "text": "on", + "start": 57.58, + "end": 57.74, + "confidence": 0.854 + }, + { + "text": "the", + "start": 57.74, + "end": 58.82, + "confidence": 0.409 + }, + { + "text": "ground", + "start": 58.82, + "end": 59.72, + "confidence": 0.332 + }, + { + "text": "with", + "start": 59.72, + "end": 60.16, + "confidence": 0.423 + }, + { + "text": "the", + "start": 60.16, + "end": 60.26, + "confidence": 0.399 + }, + { + "text": "cover,", + "start": 60.26, + "end": 61.04, + "confidence": 0.847 + }, + { + "text": "I", + "start": 61.16, + "end": 61.24, + "confidence": 0.634 + }, + { + "text": "tried", + "start": 61.24, + "end": 61.44, + "confidence": 0.58 + }, + { + "text": "it", + "start": 61.44, + "end": 61.62, + "confidence": 0.679 + }, + { + "text": "already.", + "start": 61.62, + "end": 61.8, + "confidence": 0.898 + } + ] + }, + { + "id": 18, + "seek": 5300, + "start": 62.5, + "end": 65.12, + "text": " Okay, fine, we weren't sure of that, just a suggestion.", + "tokens": [ + 50831, + 1033, + 11, + 2489, + 11, + 321, + 4999, + 380, + 988, + 295, + 300, + 11, + 445, + 257, + 16541, + 13, + 50974 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.852, + "words": [ + { + "text": "Okay,", + "start": 62.5, + "end": 62.78, + "confidence": 0.751 + }, + { + "text": "fine,", + "start": 62.88, + "end": 63.06, + "confidence": 0.973 + }, + { + "text": "we", + "start": 63.16, + "end": 63.3, + "confidence": 0.972 + }, + { + "text": "weren't", + "start": 63.3, + "end": 63.56, + "confidence": 0.994 + }, + { + "text": "sure", + "start": 63.56, + "end": 63.72, + "confidence": 0.965 + }, + { + "text": "of", + "start": 63.72, + "end": 63.86, + "confidence": 0.576 + }, + { + "text": "that,", + "start": 63.86, + "end": 63.98, + "confidence": 0.994 + }, + { + "text": "just", + "start": 64.1, + "end": 64.56, + "confidence": 0.559 + }, + { + "text": "a", + "start": 64.56, + "end": 64.74, + "confidence": 0.794 + }, + { + "text": "suggestion.", + "start": 64.74, + "end": 65.12, + "confidence": 0.999 + } + ] + }, + { + "id": 19, + "seek": 5300, + "start": 65.26, + "end": 66.02, + "text": " We thought we'd...", + "tokens": [ + 50974, + 492, + 1194, + 321, + 1116, + 485, + 51024 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.915, + "words": [ + { + "text": "We", + "start": 65.26, + "end": 65.42, + "confidence": 0.969 + }, + { + "text": "thought", + "start": 65.42, + "end": 65.54, + "confidence": 0.986 + }, + { + "text": "we'd...", + "start": 65.54, + "end": 66.02, + "confidence": 0.856 + } + ] + }, + { + "id": 20, + "seek": 5300, + "start": 66.8, + "end": 67.7, + "text": " You could check it out.", + "tokens": [ + 51049, + 509, + 727, + 1520, + 309, + 484, + 13, + 51109 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.69, + "words": [ + { + "text": "You", + "start": 66.8, + "end": 67.02, + "confidence": 0.256 + }, + { + "text": "could", + "start": 67.02, + "end": 67.18, + "confidence": 0.703 + }, + { + "text": "check", + "start": 67.18, + "end": 67.38, + "confidence": 0.874 + }, + { + "text": "it", + "start": 67.38, + "end": 67.54, + "confidence": 0.995 + }, + { + "text": "out.", + "start": 67.54, + "end": 67.7, + "confidence": 0.998 + } + ] + }, + { + "id": 21, + "seek": 5300, + "start": 68.22, + "end": 69.26, + "text": " I'm not sure if you've already done that.", + "tokens": [ + 51124, + 286, + 478, + 406, + 988, + 498, + 291, + 600, + 1217, + 1096, + 300, + 13, + 51181 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.454, + "words": [ + { + "text": "I'm", + "start": 68.22, + "end": 68.44, + "confidence": 0.319 + }, + { + "text": "not", + "start": 68.44, + "end": 68.5, + "confidence": 0.341 + }, + { + "text": "sure", + "start": 68.5, + "end": 68.64, + "confidence": 0.819 + }, + { + "text": "if", + "start": 68.64, + "end": 68.66, + "confidence": 0.422 + }, + { + "text": "you've", + "start": 68.66, + "end": 68.72, + "confidence": 0.404 + }, + { + "text": "already", + "start": 68.72, + "end": 68.86, + "confidence": 0.437 + }, + { + "text": "done", + "start": 68.86, + "end": 69.06, + "confidence": 0.439 + }, + { + "text": "that.", + "start": 69.06, + "end": 69.26, + "confidence": 0.994 + } + ] + }, + { + "id": 22, + "seek": 5300, + "start": 69.44, + "end": 72.44, + "text": " So, I guess we're gonna come up with it, let us know.", + "tokens": [ + 51181, + 407, + 11, + 286, + 2041, + 321, + 434, + 799, + 808, + 493, + 365, + 309, + 11, + 718, + 505, + 458, + 13, + 51354 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.724, + "words": [ + { + "text": "So,", + "start": 69.44, + "end": 69.6, + "confidence": 0.973 + }, + { + "text": "I", + "start": 69.7, + "end": 69.9, + "confidence": 0.545 + }, + { + "text": "guess", + "start": 69.9, + "end": 70.62, + "confidence": 0.989 + }, + { + "text": "we're", + "start": 70.62, + "end": 71.08, + "confidence": 0.732 + }, + { + "text": "gonna", + "start": 71.08, + "end": 71.22, + "confidence": 0.684 + }, + { + "text": "come", + "start": 71.22, + "end": 71.4, + "confidence": 0.94 + }, + { + "text": "up", + "start": 71.4, + "end": 71.6, + "confidence": 0.637 + }, + { + "text": "with", + "start": 71.6, + "end": 71.76, + "confidence": 0.959 + }, + { + "text": "it,", + "start": 71.76, + "end": 71.9, + "confidence": 0.206 + }, + { + "text": "let", + "start": 72.04, + "end": 72.1, + "confidence": 0.665 + }, + { + "text": "us", + "start": 72.1, + "end": 72.24, + "confidence": 0.996 + }, + { + "text": "know.", + "start": 72.24, + "end": 72.44, + "confidence": 0.998 + } + ] + }, + { + "id": 23, + "seek": 5300, + "start": 74.24, + "end": 75.16, + "text": " Okay, no problem.", + "tokens": [ + 51426, + 1033, + 11, + 572, + 1154, + 13, + 51486 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.931, + "words": [ + { + "text": "Okay,", + "start": 74.24, + "end": 74.5, + "confidence": 0.89 + }, + { + "text": "no", + "start": 74.68, + "end": 74.82, + "confidence": 0.932 + }, + { + "text": "problem.", + "start": 74.82, + "end": 75.16, + "confidence": 0.972 + } + ] + }, + { + "id": 24, + "seek": 5300, + "start": 75.54, + "end": 76.38, + "text": " Okay, no problem.", + "tokens": [ + 51486, + 1033, + 11, + 572, + 1154, + 13, + 51544 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.953, + "words": [ + { + "text": "Okay,", + "start": 75.54, + "end": 75.84, + "confidence": 0.944 + }, + { + "text": "no", + "start": 75.92, + "end": 76.16, + "confidence": 0.919 + }, + { + "text": "problem.", + "start": 76.16, + "end": 76.38, + "confidence": 0.997 + } + ] + }, + { + "id": 25, + "seek": 5300, + "start": 76.38, + "end": 77.72, + "text": " We'll let you know when the end of the...", + "tokens": [ + 51544, + 492, + 603, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 264, + 485, + 51606 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.761, + "words": [ + { + "text": "We'll", + "start": 76.38, + "end": 76.8, + "confidence": 0.79 + }, + { + "text": "let", + "start": 76.8, + "end": 76.92, + "confidence": 0.955 + }, + { + "text": "you", + "start": 76.92, + "end": 77.0, + "confidence": 0.701 + }, + { + "text": "know", + "start": 77.0, + "end": 77.16, + "confidence": 0.997 + }, + { + "text": "when", + "start": 77.16, + "end": 77.32, + "confidence": 0.416 + }, + { + "text": "the", + "start": 77.32, + "end": 77.4, + "confidence": 0.875 + }, + { + "text": "end", + "start": 77.4, + "end": 77.52, + "confidence": 0.98 + }, + { + "text": "of", + "start": 77.52, + "end": 77.64, + "confidence": 0.978 + }, + { + "text": "the...", + "start": 77.64, + "end": 77.72, + "confidence": 0.449 + } + ] + }, + { + "id": 26, + "seek": 5300, + "start": 78.04, + "end": 78.24, + "text": " ...sun.", + "tokens": [ + 51611, + 1097, + 11314, + 13, + 51656 + ], + "temperature": 0.1, + "avg_logprob": -0.7027367549156075, + "compression_ratio": 1.6748971193415638, + "no_speech_prob": 0.43447715044021606, + "confidence": 0.258, + "words": [ + { + "text": "...sun.", + "start": 78.04, + "end": 78.24, + "confidence": 0.258 + } + ] + }, + { + "id": 27, + "seek": 8300, + "start": 88.8, + "end": 88.82, + "text": " Bye.", + "tokens": [ + 50364, + 4621, + 13, + 50666 + ], + "temperature": 0.1, + "avg_logprob": -1.2146873474121094, + "compression_ratio": 0.3333333333333333, + "no_speech_prob": 0.596446692943573, + "confidence": 0.068, + "words": [ + { + "text": "Bye.", + "start": 88.8, + "end": 88.82, + "confidence": 0.068 + } + ] + } + ], + "language": "English" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/random_apollo11.mp3.words.json b/tests/expected/corner_cases/random_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..6a4d9239f60c9a2836afbe358030673ae7cfe93b --- /dev/null +++ b/tests/expected/corner_cases/random_apollo11.mp3.words.json @@ -0,0 +1,1871 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA. Alright, okay. Yeah, sir. We like to see you guys. I'll put that camera there. They make it want to go on the helmet. We're going to have a B1. And you can put the other one on the mic helmet with those GVA. Alright, that's a bit on the helmet. I got a bit on the helmet. The other one's mic down. Mic going to the leak, we say. We got him in there helmet bag. And we got him in the helmet bag. We're taking the leak on him. Yeah, we're taking the leak on him. Yeah, we were going to hang me on it. We were going to hang you on it with the cover on the tractor. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd say you could check it out. It's not much of a hard to turn to. So I guess we're going to come up with this. Let us know. Okay, no problem. We'll let it know by the end of the night.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 6.54, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.549, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.88, + "confidence": 0.156 + }, + { + "text": "11,", + "start": 0.88, + "end": 1.26, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.72, + "end": 1.94, + "confidence": 0.518 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.968 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.909 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.972 + }, + { + "text": "Soyuz-EA", + "start": 3.92, + "end": 5.26, + "confidence": 0.32 + }, + { + "text": "GLEME", + "start": 5.26, + "end": 5.74, + "confidence": 0.558 + }, + { + "text": "GVA.", + "start": 5.74, + "end": 6.54, + "confidence": 0.336 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 12.22, + "text": " Alright, okay.", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 13, + 50964 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.388, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 11.04, + "confidence": 0.295 + }, + { + "text": "okay.", + "start": 11.68, + "end": 12.22, + "confidence": 0.511 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.5, + "end": 13.36, + "text": " Yeah, sir.", + "tokens": [ + 50964, + 865, + 11, + 4735, + 13, + 51014 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.151, + "words": [ + { + "text": "Yeah,", + "start": 12.5, + "end": 12.96, + "confidence": 0.162 + }, + { + "text": "sir.", + "start": 13.34, + "end": 13.36, + "confidence": 0.141 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 13.36, + "end": 14.22, + "text": " We like to see you guys.", + "tokens": [ + 51014, + 492, + 411, + 281, + 536, + 291, + 1074, + 13, + 51064 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.289, + "words": [ + { + "text": "We", + "start": 13.36, + "end": 13.38, + "confidence": 0.254 + }, + { + "text": "like", + "start": 13.38, + "end": 13.4, + "confidence": 0.398 + }, + { + "text": "to", + "start": 13.4, + "end": 13.56, + "confidence": 0.296 + }, + { + "text": "see", + "start": 13.56, + "end": 14.0, + "confidence": 0.18 + }, + { + "text": "you", + "start": 14.0, + "end": 14.08, + "confidence": 0.151 + }, + { + "text": "guys.", + "start": 14.08, + "end": 14.22, + "confidence": 0.714 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 14.22, + "end": 14.6, + "text": " I'll put that camera there.", + "tokens": [ + 51064, + 286, + 603, + 829, + 300, + 2799, + 456, + 13, + 51114 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.338, + "words": [ + { + "text": "I'll", + "start": 14.22, + "end": 14.24, + "confidence": 0.23 + }, + { + "text": "put", + "start": 14.24, + "end": 14.26, + "confidence": 0.236 + }, + { + "text": "that", + "start": 14.26, + "end": 14.28, + "confidence": 0.846 + }, + { + "text": "camera", + "start": 14.28, + "end": 14.48, + "confidence": 0.457 + }, + { + "text": "there.", + "start": 14.48, + "end": 14.6, + "confidence": 0.306 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 14.6, + "end": 17.17, + "text": " They make it want to go on the helmet.", + "tokens": [ + 51114, + 814, + 652, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 13, + 51214 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.561, + "words": [ + { + "text": "They", + "start": 14.6, + "end": 15.32, + "confidence": 0.319 + }, + { + "text": "make", + "start": 15.32, + "end": 15.68, + "confidence": 0.408 + }, + { + "text": "it", + "start": 15.68, + "end": 15.86, + "confidence": 0.285 + }, + { + "text": "want", + "start": 15.86, + "end": 16.06, + "confidence": 0.279 + }, + { + "text": "to", + "start": 16.06, + "end": 16.2, + "confidence": 0.978 + }, + { + "text": "go", + "start": 16.2, + "end": 16.38, + "confidence": 0.901 + }, + { + "text": "on", + "start": 16.38, + "end": 16.6, + "confidence": 0.952 + }, + { + "text": "the", + "start": 16.6, + "end": 16.78, + "confidence": 0.867 + }, + { + "text": "helmet.", + "start": 16.78, + "end": 17.17, + "confidence": 0.731 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 17.17, + "end": 19.08, + "text": " We're going to have a B1.", + "tokens": [ + 51214, + 492, + 434, + 516, + 281, + 362, + 257, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.676, + "words": [ + { + "text": "We're", + "start": 17.17, + "end": 17.76, + "confidence": 0.556 + }, + { + "text": "going", + "start": 17.76, + "end": 17.92, + "confidence": 0.623 + }, + { + "text": "to", + "start": 17.92, + "end": 17.98, + "confidence": 0.993 + }, + { + "text": "have", + "start": 17.98, + "end": 18.2, + "confidence": 0.955 + }, + { + "text": "a", + "start": 18.2, + "end": 18.36, + "confidence": 0.351 + }, + { + "text": "B1.", + "start": 18.36, + "end": 19.08, + "confidence": 0.825 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.36, + "end": 23.74, + "text": " And you can put the other one on the mic helmet with those GVA.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 13, + 51564 + ], + "temperature": 0.2, + "avg_logprob": -0.6873873496542171, + "compression_ratio": 1.4019607843137254, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.651, + "words": [ + { + "text": "And", + "start": 19.36, + "end": 19.62, + "confidence": 0.49 + }, + { + "text": "you", + "start": 19.62, + "end": 20.32, + "confidence": 0.918 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.781 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.991 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.986 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.998 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.981 + }, + { + "text": "on", + "start": 21.18, + "end": 21.42, + "confidence": 0.992 + }, + { + "text": "the", + "start": 21.42, + "end": 21.96, + "confidence": 0.633 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.48, + "confidence": 0.438 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.8, + "confidence": 0.951 + }, + { + "text": "with", + "start": 22.8, + "end": 23.06, + "confidence": 0.354 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.396 + }, + { + "text": "GVA.", + "start": 23.3, + "end": 23.74, + "confidence": 0.36 + } + ] + }, + { + "id": 8, + "seek": 2400, + "start": 27.38, + "end": 31.2, + "text": " Alright, that's a bit on the helmet.", + "tokens": [ + 50364, + 2798, + 11, + 300, + 311, + 257, + 857, + 322, + 264, + 15922, + 13, + 50714 + ], + "temperature": 0.2, + "avg_logprob": -0.7881743211012621, + "compression_ratio": 1.55, + "no_speech_prob": 0.18406571447849274, + "confidence": 0.362, + "words": [ + { + "text": "Alright,", + "start": 27.38, + "end": 27.4, + "confidence": 0.087 + }, + { + "text": "that's", + "start": 29.36, + "end": 29.4, + "confidence": 0.285 + }, + { + "text": "a", + "start": 29.4, + "end": 29.42, + "confidence": 0.404 + }, + { + "text": "bit", + "start": 29.42, + "end": 29.88, + "confidence": 0.419 + }, + { + "text": "on", + "start": 29.88, + "end": 30.48, + "confidence": 0.709 + }, + { + "text": "the", + "start": 30.48, + "end": 31.18, + "confidence": 0.344 + }, + { + "text": "helmet.", + "start": 31.18, + "end": 31.2, + "confidence": 0.996 + } + ] + }, + { + "id": 9, + "seek": 2400, + "start": 31.34, + "end": 34.2, + "text": " I got a bit on the helmet.", + "tokens": [ + 50714, + 286, + 658, + 257, + 857, + 322, + 264, + 15922, + 13, + 50864 + ], + "temperature": 0.2, + "avg_logprob": -0.7881743211012621, + "compression_ratio": 1.55, + "no_speech_prob": 0.18406571447849274, + "confidence": 0.56, + "words": [ + { + "text": "I", + "start": 31.34, + "end": 31.54, + "confidence": 0.234 + }, + { + "text": "got", + "start": 31.54, + "end": 31.84, + "confidence": 0.766 + }, + { + "text": "a", + "start": 31.84, + "end": 32.12, + "confidence": 0.372 + }, + { + "text": "bit", + "start": 32.12, + "end": 33.46, + "confidence": 0.407 + }, + { + "text": "on", + "start": 33.46, + "end": 33.88, + "confidence": 0.932 + }, + { + "text": "the", + "start": 33.88, + "end": 34.04, + "confidence": 0.684 + }, + { + "text": "helmet.", + "start": 34.04, + "end": 34.2, + "confidence": 0.998 + } + ] + }, + { + "id": 10, + "seek": 2400, + "start": 34.24, + "end": 36.38, + "text": " The other one's mic down.", + "tokens": [ + 50864, + 440, + 661, + 472, + 311, + 3123, + 760, + 13, + 51014 + ], + "temperature": 0.2, + "avg_logprob": -0.7881743211012621, + "compression_ratio": 1.55, + "no_speech_prob": 0.18406571447849274, + "confidence": 0.352, + "words": [ + { + "text": "The", + "start": 34.24, + "end": 34.6, + "confidence": 0.155 + }, + { + "text": "other", + "start": 34.6, + "end": 35.58, + "confidence": 0.945 + }, + { + "text": "one's", + "start": 35.58, + "end": 35.86, + "confidence": 0.432 + }, + { + "text": "mic", + "start": 35.86, + "end": 36.06, + "confidence": 0.238 + }, + { + "text": "down.", + "start": 36.06, + "end": 36.38, + "confidence": 0.295 + } + ] + }, + { + "id": 11, + "seek": 2400, + "start": 37.6, + "end": 39.18, + "text": " Mic going to the leak, we say.", + "tokens": [ + 51014, + 5818, + 516, + 281, + 264, + 17143, + 11, + 321, + 584, + 13, + 51114 + ], + "temperature": 0.2, + "avg_logprob": -0.7881743211012621, + "compression_ratio": 1.55, + "no_speech_prob": 0.18406571447849274, + "confidence": 0.452, + "words": [ + { + "text": "Mic", + "start": 37.6, + "end": 37.86, + "confidence": 0.354 + }, + { + "text": "going", + "start": 37.86, + "end": 38.12, + "confidence": 0.652 + }, + { + "text": "to", + "start": 38.12, + "end": 38.36, + "confidence": 0.336 + }, + { + "text": "the", + "start": 38.36, + "end": 38.48, + "confidence": 0.469 + }, + { + "text": "leak,", + "start": 38.48, + "end": 38.76, + "confidence": 0.165 + }, + { + "text": "we", + "start": 38.94, + "end": 38.96, + "confidence": 0.862 + }, + { + "text": "say.", + "start": 38.96, + "end": 39.18, + "confidence": 0.748 + } + ] + }, + { + "id": 12, + "seek": 2400, + "start": 39.88, + "end": 41.94, + "text": " We got him in there helmet bag.", + "tokens": [ + 51114, + 492, + 658, + 796, + 294, + 456, + 15922, + 3411, + 13, + 51264 + ], + "temperature": 0.2, + "avg_logprob": -0.7881743211012621, + "compression_ratio": 1.55, + "no_speech_prob": 0.18406571447849274, + "confidence": 0.46, + "words": [ + { + "text": "We", + "start": 39.88, + "end": 40.2, + "confidence": 0.545 + }, + { + "text": "got", + "start": 40.2, + "end": 40.48, + "confidence": 0.48 + }, + { + "text": "him", + "start": 40.48, + "end": 40.64, + "confidence": 0.434 + }, + { + "text": "in", + "start": 40.64, + "end": 40.76, + "confidence": 0.886 + }, + { + "text": "there", + "start": 40.76, + "end": 40.98, + "confidence": 0.744 + }, + { + "text": "helmet", + "start": 40.98, + "end": 41.52, + "confidence": 0.288 + }, + { + "text": "bag.", + "start": 41.52, + "end": 41.94, + "confidence": 0.204 + } + ] + }, + { + "id": 13, + "seek": 2400, + "start": 42.46, + "end": 47.42, + "text": " And we got him in the helmet bag.", + "tokens": [ + 51264, + 400, + 321, + 658, + 796, + 294, + 264, + 15922, + 3411, + 13, + 51564 + ], + "temperature": 0.2, + "avg_logprob": -0.7881743211012621, + "compression_ratio": 1.55, + "no_speech_prob": 0.18406571447849274, + "confidence": 0.413, + "words": [ + { + "text": "And", + "start": 42.46, + "end": 43.34, + "confidence": 0.574 + }, + { + "text": "we", + "start": 43.34, + "end": 44.0, + "confidence": 0.309 + }, + { + "text": "got", + "start": 44.0, + "end": 44.18, + "confidence": 0.29 + }, + { + "text": "him", + "start": 44.18, + "end": 44.48, + "confidence": 0.3 + }, + { + "text": "in", + "start": 44.48, + "end": 44.52, + "confidence": 0.815 + }, + { + "text": "the", + "start": 44.52, + "end": 44.64, + "confidence": 0.394 + }, + { + "text": "helmet", + "start": 44.64, + "end": 46.46, + "confidence": 0.194 + }, + { + "text": "bag.", + "start": 46.46, + "end": 47.42, + "confidence": 0.887 + } + ] + }, + { + "id": 14, + "seek": 4800, + "start": 51.48, + "end": 52.94, + "text": " We're taking the leak on him.", + "tokens": [ + 50364, + 492, + 434, + 1940, + 264, + 17143, + 322, + 796, + 13, + 50614 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.306, + "words": [ + { + "text": "We're", + "start": 51.48, + "end": 51.68, + "confidence": 0.246 + }, + { + "text": "taking", + "start": 51.68, + "end": 51.92, + "confidence": 0.313 + }, + { + "text": "the", + "start": 51.92, + "end": 52.54, + "confidence": 0.296 + }, + { + "text": "leak", + "start": 52.54, + "end": 52.56, + "confidence": 0.249 + }, + { + "text": "on", + "start": 52.56, + "end": 52.86, + "confidence": 0.23 + }, + { + "text": "him.", + "start": 52.86, + "end": 52.94, + "confidence": 0.78 + } + ] + }, + { + "id": 15, + "seek": 4800, + "start": 53.06, + "end": 54.36, + "text": " Yeah, we're taking the leak on him.", + "tokens": [ + 50614, + 865, + 11, + 321, + 434, + 1940, + 264, + 17143, + 322, + 796, + 13, + 50714 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.911, + "words": [ + { + "text": "Yeah,", + "start": 53.06, + "end": 53.26, + "confidence": 0.764 + }, + { + "text": "we're", + "start": 53.34, + "end": 53.4, + "confidence": 0.928 + }, + { + "text": "taking", + "start": 53.4, + "end": 53.62, + "confidence": 0.971 + }, + { + "text": "the", + "start": 53.62, + "end": 53.78, + "confidence": 0.91 + }, + { + "text": "leak", + "start": 53.78, + "end": 53.86, + "confidence": 0.992 + }, + { + "text": "on", + "start": 53.86, + "end": 54.18, + "confidence": 0.828 + }, + { + "text": "him.", + "start": 54.18, + "end": 54.36, + "confidence": 0.994 + } + ] + }, + { + "id": 16, + "seek": 4800, + "start": 56.58, + "end": 57.86, + "text": " Yeah, we were going to hang me on it.", + "tokens": [ + 50714, + 865, + 11, + 321, + 645, + 516, + 281, + 3967, + 385, + 322, + 309, + 13, + 50864 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.419, + "words": [ + { + "text": "Yeah,", + "start": 56.58, + "end": 56.6, + "confidence": 0.142 + }, + { + "text": "we", + "start": 56.66, + "end": 56.68, + "confidence": 0.314 + }, + { + "text": "were", + "start": 56.68, + "end": 56.84, + "confidence": 0.437 + }, + { + "text": "going", + "start": 56.84, + "end": 57.08, + "confidence": 0.377 + }, + { + "text": "to", + "start": 57.08, + "end": 57.22, + "confidence": 0.983 + }, + { + "text": "hang", + "start": 57.22, + "end": 57.38, + "confidence": 0.609 + }, + { + "text": "me", + "start": 57.38, + "end": 57.58, + "confidence": 0.379 + }, + { + "text": "on", + "start": 57.58, + "end": 57.72, + "confidence": 0.819 + }, + { + "text": "it.", + "start": 57.72, + "end": 57.86, + "confidence": 0.292 + } + ] + }, + { + "id": 17, + "seek": 4800, + "start": 57.86, + "end": 61.48, + "text": " We were going to hang you on it with the cover on the tractor.", + "tokens": [ + 50864, + 492, + 645, + 516, + 281, + 3967, + 291, + 322, + 309, + 365, + 264, + 2060, + 322, + 264, + 31857, + 13, + 51064 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.54, + "words": [ + { + "text": "We", + "start": 57.86, + "end": 58.02, + "confidence": 0.506 + }, + { + "text": "were", + "start": 58.02, + "end": 58.4, + "confidence": 0.595 + }, + { + "text": "going", + "start": 58.4, + "end": 58.44, + "confidence": 0.945 + }, + { + "text": "to", + "start": 58.44, + "end": 58.6, + "confidence": 0.995 + }, + { + "text": "hang", + "start": 58.6, + "end": 58.72, + "confidence": 0.966 + }, + { + "text": "you", + "start": 58.72, + "end": 58.9, + "confidence": 0.597 + }, + { + "text": "on", + "start": 58.9, + "end": 59.16, + "confidence": 0.791 + }, + { + "text": "it", + "start": 59.16, + "end": 59.52, + "confidence": 0.866 + }, + { + "text": "with", + "start": 59.52, + "end": 60.12, + "confidence": 0.524 + }, + { + "text": "the", + "start": 60.12, + "end": 60.28, + "confidence": 0.573 + }, + { + "text": "cover", + "start": 60.28, + "end": 61.0, + "confidence": 0.876 + }, + { + "text": "on", + "start": 61.0, + "end": 61.22, + "confidence": 0.462 + }, + { + "text": "the", + "start": 61.22, + "end": 61.32, + "confidence": 0.27 + }, + { + "text": "tractor.", + "start": 61.32, + "end": 61.48, + "confidence": 0.049 + } + ] + }, + { + "id": 18, + "seek": 4800, + "start": 62.52, + "end": 63.03, + "text": " Okay, fine.", + "tokens": [ + 51064, + 1033, + 11, + 2489, + 13, + 51114 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.883, + "words": [ + { + "text": "Okay,", + "start": 62.52, + "end": 62.78, + "confidence": 0.87 + }, + { + "text": "fine.", + "start": 62.88, + "end": 63.03, + "confidence": 0.897 + } + ] + }, + { + "id": 19, + "seek": 4800, + "start": 63.03, + "end": 63.98, + "text": " We weren't sure of that.", + "tokens": [ + 51114, + 492, + 4999, + 380, + 988, + 295, + 300, + 13, + 51164 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.873, + "words": [ + { + "text": "We", + "start": 63.03, + "end": 63.28, + "confidence": 0.878 + }, + { + "text": "weren't", + "start": 63.28, + "end": 63.54, + "confidence": 0.977 + }, + { + "text": "sure", + "start": 63.54, + "end": 63.72, + "confidence": 0.973 + }, + { + "text": "of", + "start": 63.72, + "end": 63.84, + "confidence": 0.549 + }, + { + "text": "that.", + "start": 63.84, + "end": 63.98, + "confidence": 0.989 + } + ] + }, + { + "id": 20, + "seek": 4800, + "start": 64.14, + "end": 65.1, + "text": " Just a suggestion.", + "tokens": [ + 51164, + 1449, + 257, + 16541, + 13, + 51214 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.742, + "words": [ + { + "text": "Just", + "start": 64.14, + "end": 64.56, + "confidence": 0.489 + }, + { + "text": "a", + "start": 64.56, + "end": 64.72, + "confidence": 0.838 + }, + { + "text": "suggestion.", + "start": 64.72, + "end": 65.1, + "confidence": 0.997 + } + ] + }, + { + "id": 21, + "seek": 4800, + "start": 65.2, + "end": 67.7, + "text": " We thought we'd say you could check it out.", + "tokens": [ + 51214, + 492, + 1194, + 321, + 1116, + 584, + 291, + 727, + 1520, + 309, + 484, + 13, + 51364 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.697, + "words": [ + { + "text": "We", + "start": 65.2, + "end": 65.4, + "confidence": 0.885 + }, + { + "text": "thought", + "start": 65.4, + "end": 65.54, + "confidence": 0.979 + }, + { + "text": "we'd", + "start": 65.54, + "end": 65.9, + "confidence": 0.576 + }, + { + "text": "say", + "start": 65.9, + "end": 66.06, + "confidence": 0.275 + }, + { + "text": "you", + "start": 66.06, + "end": 67.08, + "confidence": 0.928 + }, + { + "text": "could", + "start": 67.08, + "end": 67.22, + "confidence": 0.728 + }, + { + "text": "check", + "start": 67.22, + "end": 67.38, + "confidence": 0.513 + }, + { + "text": "it", + "start": 67.38, + "end": 67.54, + "confidence": 0.991 + }, + { + "text": "out.", + "start": 67.54, + "end": 67.7, + "confidence": 0.998 + } + ] + }, + { + "id": 22, + "seek": 4800, + "start": 68.2, + "end": 69.22, + "text": " It's not much of a hard to turn to.", + "tokens": [ + 51364, + 467, + 311, + 406, + 709, + 295, + 257, + 1152, + 281, + 1261, + 281, + 13, + 51414 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.467, + "words": [ + { + "text": "It's", + "start": 68.2, + "end": 68.3, + "confidence": 0.373 + }, + { + "text": "not", + "start": 68.3, + "end": 68.4, + "confidence": 0.822 + }, + { + "text": "much", + "start": 68.4, + "end": 68.56, + "confidence": 0.939 + }, + { + "text": "of", + "start": 68.56, + "end": 68.7, + "confidence": 0.638 + }, + { + "text": "a", + "start": 68.7, + "end": 68.72, + "confidence": 0.67 + }, + { + "text": "hard", + "start": 68.72, + "end": 68.84, + "confidence": 0.173 + }, + { + "text": "to", + "start": 68.84, + "end": 68.98, + "confidence": 0.382 + }, + { + "text": "turn", + "start": 68.98, + "end": 69.1, + "confidence": 0.696 + }, + { + "text": "to.", + "start": 69.1, + "end": 69.22, + "confidence": 0.232 + } + ] + }, + { + "id": 23, + "seek": 4800, + "start": 69.22, + "end": 71.92, + "text": " So I guess we're going to come up with this.", + "tokens": [ + 51414, + 407, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13, + 51564 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.742, + "words": [ + { + "text": "So", + "start": 69.22, + "end": 69.56, + "confidence": 0.656 + }, + { + "text": "I", + "start": 69.56, + "end": 70.48, + "confidence": 0.493 + }, + { + "text": "guess", + "start": 70.48, + "end": 70.62, + "confidence": 0.992 + }, + { + "text": "we're", + "start": 70.62, + "end": 71.08, + "confidence": 0.73 + }, + { + "text": "going", + "start": 71.08, + "end": 71.2, + "confidence": 0.883 + }, + { + "text": "to", + "start": 71.2, + "end": 71.26, + "confidence": 0.993 + }, + { + "text": "come", + "start": 71.26, + "end": 71.4, + "confidence": 0.903 + }, + { + "text": "up", + "start": 71.4, + "end": 71.6, + "confidence": 0.831 + }, + { + "text": "with", + "start": 71.6, + "end": 71.76, + "confidence": 0.966 + }, + { + "text": "this.", + "start": 71.76, + "end": 71.92, + "confidence": 0.345 + } + ] + }, + { + "id": 24, + "seek": 4800, + "start": 71.92, + "end": 72.44, + "text": " Let us know.", + "tokens": [ + 51564, + 961, + 505, + 458, + 13, + 51614 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.896, + "words": [ + { + "text": "Let", + "start": 71.92, + "end": 72.1, + "confidence": 0.726 + }, + { + "text": "us", + "start": 72.1, + "end": 72.24, + "confidence": 0.993 + }, + { + "text": "know.", + "start": 72.24, + "end": 72.44, + "confidence": 0.999 + } + ] + }, + { + "id": 25, + "seek": 4800, + "start": 74.18, + "end": 75.14, + "text": " Okay, no problem.", + "tokens": [ + 51614, + 1033, + 11, + 572, + 1154, + 13, + 51714 + ], + "temperature": 0.2, + "avg_logprob": -0.4506545960903168, + "compression_ratio": 1.766355140186916, + "no_speech_prob": 0.14940056204795837, + "confidence": 0.838, + "words": [ + { + "text": "Okay,", + "start": 74.18, + "end": 74.46, + "confidence": 0.687 + }, + { + "text": "no", + "start": 74.58, + "end": 74.82, + "confidence": 0.878 + }, + { + "text": "problem.", + "start": 74.82, + "end": 75.14, + "confidence": 0.975 + } + ] + }, + { + "id": 26, + "seek": 7500, + "start": 76.56, + "end": 78.24, + "text": " We'll let it know by the end of the night.", + "tokens": [ + 50364, + 492, + 603, + 718, + 309, + 458, + 538, + 264, + 917, + 295, + 264, + 1818, + 13, + 50514 + ], + "temperature": 0.2, + "avg_logprob": -0.6192415873209636, + "compression_ratio": 0.9130434782608695, + "no_speech_prob": 0.813217043876648, + "confidence": 0.532, + "words": [ + { + "text": "We'll", + "start": 76.56, + "end": 76.8, + "confidence": 0.479 + }, + { + "text": "let", + "start": 76.8, + "end": 76.92, + "confidence": 0.593 + }, + { + "text": "it", + "start": 76.92, + "end": 76.94, + "confidence": 0.429 + }, + { + "text": "know", + "start": 76.94, + "end": 77.18, + "confidence": 0.46 + }, + { + "text": "by", + "start": 77.18, + "end": 77.32, + "confidence": 0.375 + }, + { + "text": "the", + "start": 77.32, + "end": 77.4, + "confidence": 0.972 + }, + { + "text": "end", + "start": 77.4, + "end": 77.5, + "confidence": 0.993 + }, + { + "text": "of", + "start": 77.5, + "end": 77.64, + "confidence": 0.989 + }, + { + "text": "the", + "start": 77.64, + "end": 78.14, + "confidence": 0.417 + }, + { + "text": "night.", + "start": 78.14, + "end": 78.24, + "confidence": 0.245 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/stucked_lm_apollo11.mp3.words.json b/tests/expected/corner_cases/stucked_lm_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1d7ba1e89ba7ab0a27ab9ce488370cd869193567 --- /dev/null +++ b/tests/expected/corner_cases/stucked_lm_apollo11.mp3.words.json @@ -0,0 +1,3820 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 6.54, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.7220700916491056, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.541, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.88, + "confidence": 0.156 + }, + { + "text": "11,", + "start": 0.88, + "end": 1.26, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.72, + "end": 1.94, + "confidence": 0.518 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.968 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.909 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.972 + }, + { + "text": "Soyuz-VA", + "start": 3.92, + "end": 5.16, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.16, + "end": 5.74, + "confidence": 0.474 + }, + { + "text": "GVA.", + "start": 5.74, + "end": 6.54, + "confidence": 0.435 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 19.06, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.7220700916491056, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.467, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 11.04, + "confidence": 0.308 + }, + { + "text": "okay,", + "start": 11.68, + "end": 12.22, + "confidence": 0.507 + }, + { + "text": "we", + "start": 12.52, + "end": 12.96, + "confidence": 0.609 + }, + { + "text": "like", + "start": 12.96, + "end": 13.28, + "confidence": 0.501 + }, + { + "text": "to", + "start": 13.28, + "end": 13.54, + "confidence": 0.265 + }, + { + "text": "say", + "start": 13.54, + "end": 14.9, + "confidence": 0.138 + }, + { + "text": "that", + "start": 14.9, + "end": 15.38, + "confidence": 0.199 + }, + { + "text": "they", + "start": 15.38, + "end": 15.44, + "confidence": 0.418 + }, + { + "text": "make", + "start": 15.44, + "end": 15.68, + "confidence": 0.396 + }, + { + "text": "the", + "start": 15.68, + "end": 15.84, + "confidence": 0.252 + }, + { + "text": "one", + "start": 15.84, + "end": 16.06, + "confidence": 0.607 + }, + { + "text": "that's", + "start": 16.06, + "end": 16.28, + "confidence": 0.441 + }, + { + "text": "on", + "start": 16.28, + "end": 16.48, + "confidence": 0.595 + }, + { + "text": "the", + "start": 16.48, + "end": 16.78, + "confidence": 0.872 + }, + { + "text": "helmet", + "start": 16.78, + "end": 17.26, + "confidence": 0.856 + }, + { + "text": "we're", + "start": 17.26, + "end": 17.76, + "confidence": 0.297 + }, + { + "text": "going", + "start": 17.76, + "end": 17.92, + "confidence": 0.599 + }, + { + "text": "to", + "start": 17.92, + "end": 18.06, + "confidence": 0.823 + }, + { + "text": "have", + "start": 18.06, + "end": 18.2, + "confidence": 0.838 + }, + { + "text": "in", + "start": 18.2, + "end": 18.36, + "confidence": 0.717 + }, + { + "text": "B1.", + "start": 18.36, + "end": 19.06, + "confidence": 0.766 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 19.36, + "end": 24.52, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.7220700916491056, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.486, + "words": [ + { + "text": "And", + "start": 19.36, + "end": 20.2, + "confidence": 0.669 + }, + { + "text": "you", + "start": 20.2, + "end": 20.32, + "confidence": 0.948 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.731 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.981 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.989 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.991 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.978 + }, + { + "text": "on", + "start": 21.18, + "end": 21.4, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.4, + "end": 21.94, + "confidence": 0.522 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.48, + "confidence": 0.414 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.8, + "confidence": 0.883 + }, + { + "text": "with", + "start": 22.8, + "end": 23.06, + "confidence": 0.426 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.466 + }, + { + "text": "GVA", + "start": 23.3, + "end": 23.74, + "confidence": 0.216 + }, + { + "text": "blizzard", + "start": 23.74, + "end": 24.18, + "confidence": 0.108 + }, + { + "text": "frames.", + "start": 24.18, + "end": 24.52, + "confidence": 0.256 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 31.34, + "end": 54.98, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.11161944071451822, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.42649850249290466, + "confidence": 0.93, + "words": [ + { + "text": "Alright,", + "start": 31.34, + "end": 31.52, + "confidence": 0.066 + }, + { + "text": "got", + "start": 31.82, + "end": 31.84, + "confidence": 0.334 + }, + { + "text": "them,", + "start": 31.84, + "end": 32.12, + "confidence": 0.274 + }, + { + "text": "got", + "start": 32.38, + "end": 32.9, + "confidence": 0.568 + }, + { + "text": "them,", + "start": 32.9, + "end": 33.46, + "confidence": 0.941 + }, + { + "text": "got", + "start": 33.74, + "end": 33.76, + "confidence": 0.58 + }, + { + "text": "them,", + "start": 33.76, + "end": 33.78, + "confidence": 0.925 + }, + { + "text": "got", + "start": 33.78, + "end": 33.8, + "confidence": 0.521 + }, + { + "text": "them,", + "start": 33.8, + "end": 33.9, + "confidence": 0.906 + }, + { + "text": "got", + "start": 34.12, + "end": 34.14, + "confidence": 0.635 + }, + { + "text": "them,", + "start": 34.14, + "end": 34.5, + "confidence": 0.919 + }, + { + "text": "got", + "start": 34.5, + "end": 34.6, + "confidence": 0.69 + }, + { + "text": "them,", + "start": 34.6, + "end": 34.62, + "confidence": 0.946 + }, + { + "text": "got", + "start": 34.62, + "end": 34.64, + "confidence": 0.772 + }, + { + "text": "them,", + "start": 34.64, + "end": 34.66, + "confidence": 0.967 + }, + { + "text": "got", + "start": 34.66, + "end": 34.68, + "confidence": 0.856 + }, + { + "text": "them,", + "start": 34.68, + "end": 34.7, + "confidence": 0.975 + }, + { + "text": "got", + "start": 34.82, + "end": 34.92, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 34.92, + "end": 34.94, + "confidence": 0.981 + }, + { + "text": "got", + "start": 34.94, + "end": 34.96, + "confidence": 0.909 + }, + { + "text": "them,", + "start": 34.96, + "end": 35.1, + "confidence": 0.985 + }, + { + "text": "got", + "start": 35.1, + "end": 35.28, + "confidence": 0.931 + }, + { + "text": "them,", + "start": 35.28, + "end": 35.62, + "confidence": 0.988 + }, + { + "text": "got", + "start": 35.78, + "end": 35.8, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 35.8, + "end": 35.82, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.02, + "end": 36.04, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.04, + "end": 36.06, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.06, + "end": 36.08, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 36.08, + "end": 36.1, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.1, + "end": 36.12, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.12, + "end": 36.14, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.14, + "end": 36.16, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 36.16, + "end": 36.18, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.18, + "end": 36.34, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 36.34, + "end": 36.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 36.74, + "end": 37.46, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.82, + "end": 37.84, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.84, + "end": 38.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.12, + "end": 38.14, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.14, + "end": 38.5, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.5, + "end": 38.52, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.52, + "end": 38.54, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.54, + "end": 38.56, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 38.56, + "end": 38.58, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.58, + "end": 38.6, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 38.6, + "end": 38.62, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.62, + "end": 38.64, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.64, + "end": 38.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.66, + "end": 38.68, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 38.68, + "end": 38.7, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.7, + "end": 38.72, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.72, + "end": 38.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.74, + "end": 38.76, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.76, + "end": 38.78, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.78, + "end": 38.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.8, + "end": 38.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.82, + "end": 38.84, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.84, + "end": 38.86, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.86, + "end": 38.88, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.88, + "end": 38.9, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.9, + "end": 38.92, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.92, + "end": 38.94, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.94, + "end": 38.96, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 38.96, + "end": 38.98, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.98, + "end": 39.0, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 39.0, + "end": 39.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 39.02, + "end": 39.04, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 39.04, + "end": 39.06, + "confidence": 0.993 + }, + { + "text": "got", + "start": 39.06, + "end": 39.08, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 39.08, + "end": 39.1, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.1, + "end": 39.12, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 39.12, + "end": 39.14, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.14, + "end": 39.16, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 39.16, + "end": 39.18, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.18, + "end": 39.2, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 39.2, + "end": 39.22, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.22, + "end": 39.24, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 39.24, + "end": 39.26, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.26, + "end": 39.28, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 39.28, + "end": 39.3, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.3, + "end": 39.32, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 39.32, + "end": 39.34, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.34, + "end": 39.36, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 39.36, + "end": 39.38, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.38, + "end": 39.4, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 39.4, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.42, + "end": 39.44, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.44, + "end": 39.46, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.46, + "end": 39.48, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.48, + "end": 39.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.5, + "end": 39.52, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.52, + "end": 39.54, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.54, + "end": 39.56, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.56, + "end": 39.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.58, + "end": 39.6, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.6, + "end": 39.62, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.62, + "end": 39.64, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.64, + "end": 39.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.66, + "end": 39.68, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.68, + "end": 39.7, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.7, + "end": 39.72, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.72, + "end": 39.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.74, + "end": 39.76, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.76, + "end": 39.78, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.78, + "end": 39.8, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.8, + "end": 39.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.82, + "end": 39.84, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.84, + "end": 39.86, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.86, + "end": 39.88, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.88, + "end": 39.9, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.9, + "end": 39.92, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.92, + "end": 39.94, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.94, + "end": 39.96, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.96, + "end": 39.98, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.98, + "end": 40.0, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.0, + "end": 40.02, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.02, + "end": 40.04, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.04, + "end": 40.06, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.06, + "end": 40.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.08, + "end": 40.1, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.1, + "end": 40.12, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.12, + "end": 40.14, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.14, + "end": 40.16, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.16, + "end": 40.18, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.18, + "end": 40.2, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.2, + "end": 40.22, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.22, + "end": 40.46, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.46, + "end": 40.76, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.0, + "end": 41.02, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 41.02, + "end": 41.72, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.9, + "end": 41.92, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 41.92, + "end": 43.0, + "confidence": 0.996 + }, + { + "text": "got", + "start": 43.0, + "end": 44.06, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 44.06, + "end": 44.88, + "confidence": 0.996 + }, + { + "text": "got", + "start": 44.94, + "end": 45.46, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 45.46, + "end": 45.76, + "confidence": 0.996 + }, + { + "text": "got", + "start": 45.76, + "end": 47.14, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 47.14, + "end": 47.76, + "confidence": 0.996 + }, + { + "text": "got", + "start": 47.82, + "end": 48.52, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 48.52, + "end": 48.9, + "confidence": 0.996 + }, + { + "text": "got", + "start": 48.9, + "end": 49.2, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 49.2, + "end": 50.8, + "confidence": 0.996 + }, + { + "text": "got", + "start": 51.14, + "end": 51.74, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 51.74, + "end": 52.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 52.44, + "end": 53.26, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 53.26, + "end": 53.74, + "confidence": 0.997 + }, + { + "text": "got", + "start": 53.76, + "end": 54.2, + "confidence": 0.995 + }, + { + "text": "them", + "start": 54.2, + "end": 54.98, + "confidence": 0.997 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 55.0, + "end": 85.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.05313938722482177, + "compression_ratio": 29.52, + "no_speech_prob": 0.24551986157894135, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 55.0, + "end": 55.1, + "confidence": 0.213 + }, + { + "text": "them,", + "start": 55.1, + "end": 55.12, + "confidence": 0.95 + }, + { + "text": "got", + "start": 55.16, + "end": 55.18, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 55.18, + "end": 55.5, + "confidence": 0.997 + }, + { + "text": "got", + "start": 55.56, + "end": 57.36, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 57.36, + "end": 57.52, + "confidence": 0.997 + }, + { + "text": "got", + "start": 57.58, + "end": 57.68, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 57.68, + "end": 57.7, + "confidence": 0.997 + }, + { + "text": "got", + "start": 57.7, + "end": 57.72, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 57.72, + "end": 57.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 57.74, + "end": 57.76, + "confidence": 0.913 + }, + { + "text": "them,", + "start": 57.76, + "end": 57.78, + "confidence": 0.988 + }, + { + "text": "got", + "start": 57.78, + "end": 57.8, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 57.8, + "end": 57.82, + "confidence": 0.982 + }, + { + "text": "got", + "start": 57.82, + "end": 57.84, + "confidence": 0.862 + }, + { + "text": "them,", + "start": 57.84, + "end": 57.86, + "confidence": 0.984 + }, + { + "text": "got", + "start": 57.86, + "end": 57.88, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 57.88, + "end": 57.9, + "confidence": 0.983 + }, + { + "text": "got", + "start": 57.9, + "end": 58.98, + "confidence": 0.851 + }, + { + "text": "them,", + "start": 58.98, + "end": 59.22, + "confidence": 0.924 + }, + { + "text": "got", + "start": 60.5, + "end": 61.26, + "confidence": 0.559 + }, + { + "text": "them,", + "start": 61.26, + "end": 61.94, + "confidence": 0.958 + }, + { + "text": "got", + "start": 61.94, + "end": 61.96, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 61.96, + "end": 61.98, + "confidence": 0.976 + }, + { + "text": "got", + "start": 61.98, + "end": 62.0, + "confidence": 0.844 + }, + { + "text": "them,", + "start": 62.0, + "end": 62.02, + "confidence": 0.977 + }, + { + "text": "got", + "start": 62.02, + "end": 62.04, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 62.04, + "end": 62.06, + "confidence": 0.977 + }, + { + "text": "got", + "start": 62.06, + "end": 62.08, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 62.08, + "end": 62.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 62.1, + "end": 62.12, + "confidence": 0.853 + }, + { + "text": "them,", + "start": 62.12, + "end": 62.14, + "confidence": 0.983 + }, + { + "text": "got", + "start": 62.14, + "end": 62.16, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 62.16, + "end": 62.18, + "confidence": 0.986 + }, + { + "text": "got", + "start": 62.18, + "end": 62.2, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 62.2, + "end": 62.22, + "confidence": 0.985 + }, + { + "text": "got", + "start": 62.22, + "end": 62.24, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 62.24, + "end": 62.26, + "confidence": 0.985 + }, + { + "text": "got", + "start": 62.26, + "end": 62.28, + "confidence": 0.876 + }, + { + "text": "them,", + "start": 62.28, + "end": 62.3, + "confidence": 0.986 + }, + { + "text": "got", + "start": 62.3, + "end": 62.32, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 62.32, + "end": 62.34, + "confidence": 0.987 + }, + { + "text": "got", + "start": 62.34, + "end": 62.36, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 62.36, + "end": 62.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 62.38, + "end": 62.4, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 62.4, + "end": 62.42, + "confidence": 0.988 + }, + { + "text": "got", + "start": 62.44, + "end": 62.46, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 62.46, + "end": 62.48, + "confidence": 0.989 + }, + { + "text": "got", + "start": 62.54, + "end": 62.78, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 62.78, + "end": 62.8, + "confidence": 0.989 + }, + { + "text": "got", + "start": 62.8, + "end": 62.82, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 62.82, + "end": 62.84, + "confidence": 0.99 + }, + { + "text": "got", + "start": 62.84, + "end": 62.86, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 62.86, + "end": 62.88, + "confidence": 0.99 + }, + { + "text": "got", + "start": 62.88, + "end": 62.9, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 62.9, + "end": 62.92, + "confidence": 0.991 + }, + { + "text": "got", + "start": 62.92, + "end": 62.94, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 62.94, + "end": 62.96, + "confidence": 0.991 + }, + { + "text": "got", + "start": 62.96, + "end": 62.98, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 62.98, + "end": 63.0, + "confidence": 0.991 + }, + { + "text": "got", + "start": 63.0, + "end": 63.02, + "confidence": 0.92 + }, + { + "text": "them,", + "start": 63.02, + "end": 63.04, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.04, + "end": 63.06, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 63.06, + "end": 63.08, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.08, + "end": 63.1, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 63.1, + "end": 63.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.12, + "end": 63.14, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 63.14, + "end": 63.16, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.16, + "end": 63.18, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 63.18, + "end": 63.2, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.2, + "end": 63.22, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 63.22, + "end": 63.24, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.24, + "end": 63.26, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 63.26, + "end": 63.28, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.28, + "end": 63.3, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 63.3, + "end": 63.32, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.32, + "end": 63.34, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 63.34, + "end": 63.36, + "confidence": 0.993 + }, + { + "text": "got", + "start": 63.36, + "end": 63.38, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 63.38, + "end": 63.4, + "confidence": 0.993 + }, + { + "text": "got", + "start": 63.4, + "end": 63.46, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 63.46, + "end": 63.64, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.64, + "end": 63.68, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 63.68, + "end": 63.7, + "confidence": 0.993 + }, + { + "text": "got", + "start": 63.7, + "end": 63.72, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 63.72, + "end": 63.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.74, + "end": 63.76, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 63.76, + "end": 64.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 64.46, + "end": 65.06, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 65.06, + "end": 65.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 65.22, + "end": 65.24, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 65.24, + "end": 65.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 65.26, + "end": 66.0, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 66.0, + "end": 66.2, + "confidence": 0.992 + }, + { + "text": "got", + "start": 66.38, + "end": 67.18, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 67.18, + "end": 67.2, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.2, + "end": 67.34, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 67.34, + "end": 67.44, + "confidence": 0.993 + }, + { + "text": "got", + "start": 67.48, + "end": 67.5, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 67.5, + "end": 67.52, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.52, + "end": 67.54, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 67.54, + "end": 67.56, + "confidence": 0.993 + }, + { + "text": "got", + "start": 67.56, + "end": 67.58, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 67.58, + "end": 67.6, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.6, + "end": 67.62, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 67.62, + "end": 67.64, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.64, + "end": 67.66, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 67.66, + "end": 67.68, + "confidence": 0.993 + }, + { + "text": "got", + "start": 67.68, + "end": 67.7, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 67.7, + "end": 67.72, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.72, + "end": 67.74, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 67.74, + "end": 67.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 68.12, + "end": 68.36, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 68.36, + "end": 68.38, + "confidence": 0.992 + }, + { + "text": "got", + "start": 68.56, + "end": 68.58, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 68.58, + "end": 68.6, + "confidence": 0.992 + }, + { + "text": "got", + "start": 68.6, + "end": 68.8, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 68.8, + "end": 69.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 69.26, + "end": 69.28, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 69.28, + "end": 69.44, + "confidence": 0.992 + }, + { + "text": "got", + "start": 69.84, + "end": 69.86, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 69.86, + "end": 70.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 70.18, + "end": 70.5, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 70.5, + "end": 71.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 71.56, + "end": 71.58, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 71.58, + "end": 71.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 71.94, + "end": 72.1, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 72.1, + "end": 72.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 72.82, + "end": 74.48, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 74.48, + "end": 74.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 74.74, + "end": 74.88, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 74.88, + "end": 74.9, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.12, + "end": 75.14, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 75.14, + "end": 75.24, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.24, + "end": 75.78, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 75.78, + "end": 76.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.02, + "end": 76.4, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 76.4, + "end": 76.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.58, + "end": 76.82, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 76.82, + "end": 77.06, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.06, + "end": 77.2, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.2, + "end": 77.76, + "confidence": 0.993 + }, + { + "text": "got", + "start": 78.22, + "end": 78.24, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 78.24, + "end": 78.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 80.0, + "end": 80.02, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 80.02, + "end": 82.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 82.6, + "end": 84.98, + "confidence": 0.981 + }, + { + "text": "them", + "start": 84.98, + "end": 85.0, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.0, + "end": 115.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04266870609847954, + "compression_ratio": 29.52, + "no_speech_prob": 0.6326744556427002, + "confidence": 0.953, + "words": [ + { + "text": "got", + "start": 85.0, + "end": 85.4, + "confidence": 0.454 + }, + { + "text": "them,", + "start": 85.4, + "end": 85.58, + "confidence": 0.95 + }, + { + "text": "got", + "start": 86.12, + "end": 86.72, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 86.72, + "end": 87.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 87.24, + "end": 87.56, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 87.56, + "end": 87.94, + "confidence": 0.994 + }, + { + "text": "got", + "start": 88.08, + "end": 88.48, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 88.48, + "end": 88.5, + "confidence": 0.992 + }, + { + "text": "got", + "start": 88.5, + "end": 88.9, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 88.9, + "end": 89.02, + "confidence": 0.99 + }, + { + "text": "got", + "start": 89.06, + "end": 89.08, + "confidence": 0.921 + }, + { + "text": "them,", + "start": 89.08, + "end": 89.1, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.1, + "end": 89.12, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 89.12, + "end": 89.14, + "confidence": 0.988 + }, + { + "text": "got", + "start": 89.58, + "end": 89.6, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 89.6, + "end": 89.62, + "confidence": 0.986 + }, + { + "text": "got", + "start": 90.06, + "end": 90.08, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 90.08, + "end": 90.1, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.68, + "end": 90.7, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 90.7, + "end": 90.72, + "confidence": 0.972 + }, + { + "text": "got", + "start": 91.88, + "end": 91.9, + "confidence": 0.855 + }, + { + "text": "them,", + "start": 91.9, + "end": 91.92, + "confidence": 0.965 + }, + { + "text": "got", + "start": 91.92, + "end": 91.94, + "confidence": 0.831 + }, + { + "text": "them,", + "start": 91.94, + "end": 91.96, + "confidence": 0.968 + }, + { + "text": "got", + "start": 91.96, + "end": 91.98, + "confidence": 0.856 + }, + { + "text": "them,", + "start": 91.98, + "end": 92.0, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.0, + "end": 92.02, + "confidence": 0.879 + }, + { + "text": "them,", + "start": 92.02, + "end": 92.04, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.04, + "end": 92.06, + "confidence": 0.89 + }, + { + "text": "them,", + "start": 92.06, + "end": 92.08, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.08, + "end": 92.1, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 92.1, + "end": 92.12, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.12, + "end": 92.14, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 92.14, + "end": 92.16, + "confidence": 0.972 + }, + { + "text": "got", + "start": 92.16, + "end": 92.18, + "confidence": 0.882 + }, + { + "text": "them,", + "start": 92.18, + "end": 92.2, + "confidence": 0.972 + }, + { + "text": "got", + "start": 92.2, + "end": 92.22, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 92.22, + "end": 92.24, + "confidence": 0.971 + }, + { + "text": "got", + "start": 92.24, + "end": 92.26, + "confidence": 0.878 + }, + { + "text": "them,", + "start": 92.26, + "end": 92.28, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.28, + "end": 92.3, + "confidence": 0.879 + }, + { + "text": "them,", + "start": 92.3, + "end": 92.32, + "confidence": 0.971 + }, + { + "text": "got", + "start": 92.32, + "end": 92.34, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 92.34, + "end": 92.36, + "confidence": 0.971 + }, + { + "text": "got", + "start": 92.36, + "end": 92.38, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 92.38, + "end": 92.4, + "confidence": 0.973 + }, + { + "text": "got", + "start": 92.4, + "end": 92.42, + "confidence": 0.882 + }, + { + "text": "them,", + "start": 92.42, + "end": 92.44, + "confidence": 0.973 + }, + { + "text": "got", + "start": 92.44, + "end": 92.46, + "confidence": 0.882 + }, + { + "text": "them,", + "start": 92.46, + "end": 92.48, + "confidence": 0.975 + }, + { + "text": "got", + "start": 92.48, + "end": 92.5, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 92.5, + "end": 92.52, + "confidence": 0.976 + }, + { + "text": "got", + "start": 92.52, + "end": 92.54, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 92.54, + "end": 92.56, + "confidence": 0.977 + }, + { + "text": "got", + "start": 92.56, + "end": 92.58, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 92.58, + "end": 92.6, + "confidence": 0.978 + }, + { + "text": "got", + "start": 92.6, + "end": 92.62, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 92.62, + "end": 92.64, + "confidence": 0.979 + }, + { + "text": "got", + "start": 92.64, + "end": 92.66, + "confidence": 0.897 + }, + { + "text": "them,", + "start": 92.66, + "end": 92.68, + "confidence": 0.98 + }, + { + "text": "got", + "start": 92.68, + "end": 92.7, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 92.7, + "end": 92.72, + "confidence": 0.98 + }, + { + "text": "got", + "start": 92.72, + "end": 92.74, + "confidence": 0.897 + }, + { + "text": "them,", + "start": 92.74, + "end": 92.76, + "confidence": 0.981 + }, + { + "text": "got", + "start": 92.76, + "end": 92.78, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 92.78, + "end": 92.8, + "confidence": 0.982 + }, + { + "text": "got", + "start": 92.8, + "end": 92.82, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 92.82, + "end": 92.84, + "confidence": 0.983 + }, + { + "text": "got", + "start": 92.84, + "end": 92.86, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 92.86, + "end": 92.88, + "confidence": 0.984 + }, + { + "text": "got", + "start": 92.88, + "end": 92.9, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 92.9, + "end": 92.92, + "confidence": 0.984 + }, + { + "text": "got", + "start": 92.92, + "end": 92.94, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 92.94, + "end": 92.96, + "confidence": 0.985 + }, + { + "text": "got", + "start": 92.96, + "end": 92.98, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 92.98, + "end": 93.0, + "confidence": 0.986 + }, + { + "text": "got", + "start": 93.26, + "end": 93.28, + "confidence": 0.929 + }, + { + "text": "them,", + "start": 93.28, + "end": 93.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 93.3, + "end": 93.32, + "confidence": 0.931 + }, + { + "text": "them,", + "start": 93.32, + "end": 93.34, + "confidence": 0.988 + }, + { + "text": "got", + "start": 93.34, + "end": 93.36, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 93.36, + "end": 93.38, + "confidence": 0.988 + }, + { + "text": "got", + "start": 93.38, + "end": 93.4, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 93.4, + "end": 93.42, + "confidence": 0.989 + }, + { + "text": "got", + "start": 94.02, + "end": 94.14, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 94.14, + "end": 94.16, + "confidence": 0.99 + }, + { + "text": "got", + "start": 94.16, + "end": 94.18, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 94.18, + "end": 94.2, + "confidence": 0.99 + }, + { + "text": "got", + "start": 94.2, + "end": 94.22, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 94.22, + "end": 94.24, + "confidence": 0.991 + }, + { + "text": "got", + "start": 94.24, + "end": 94.26, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 94.26, + "end": 94.28, + "confidence": 0.991 + }, + { + "text": "got", + "start": 94.28, + "end": 94.3, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 94.3, + "end": 94.32, + "confidence": 0.992 + }, + { + "text": "got", + "start": 94.32, + "end": 99.5, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 99.5, + "end": 99.52, + "confidence": 0.992 + }, + { + "text": "got", + "start": 99.54, + "end": 99.58, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 99.58, + "end": 99.6, + "confidence": 0.993 + }, + { + "text": "got", + "start": 100.66, + "end": 100.68, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 100.68, + "end": 100.72, + "confidence": 0.993 + }, + { + "text": "got", + "start": 100.72, + "end": 100.74, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 100.74, + "end": 100.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 100.76, + "end": 100.78, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 100.78, + "end": 100.8, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.04, + "end": 101.06, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 101.06, + "end": 101.44, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.44, + "end": 101.46, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 101.46, + "end": 101.48, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.48, + "end": 101.5, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 101.5, + "end": 101.52, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.52, + "end": 101.54, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 101.54, + "end": 101.56, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.64, + "end": 101.66, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 101.66, + "end": 101.88, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.88, + "end": 101.9, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 101.9, + "end": 101.92, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.92, + "end": 101.94, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 101.94, + "end": 101.96, + "confidence": 0.996 + }, + { + "text": "got", + "start": 101.96, + "end": 101.98, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 101.98, + "end": 102.0, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.0, + "end": 102.02, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 102.02, + "end": 102.04, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.04, + "end": 102.06, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 102.06, + "end": 102.08, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.08, + "end": 102.1, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 102.1, + "end": 102.12, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.12, + "end": 102.14, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 102.14, + "end": 102.18, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.18, + "end": 102.2, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 102.2, + "end": 102.22, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.22, + "end": 102.24, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 102.24, + "end": 102.26, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.26, + "end": 102.28, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 102.28, + "end": 102.3, + "confidence": 0.997 + }, + { + "text": "got", + "start": 102.3, + "end": 102.32, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 102.32, + "end": 102.34, + "confidence": 0.997 + }, + { + "text": "got", + "start": 102.34, + "end": 102.36, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 102.36, + "end": 104.0, + "confidence": 0.997 + }, + { + "text": "got", + "start": 104.0, + "end": 104.02, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 104.02, + "end": 105.4, + "confidence": 0.997 + }, + { + "text": "got", + "start": 105.4, + "end": 105.42, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 105.42, + "end": 110.26, + "confidence": 0.997 + }, + { + "text": "got", + "start": 110.26, + "end": 110.28, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 110.28, + "end": 111.3, + "confidence": 0.997 + }, + { + "text": "got", + "start": 111.3, + "end": 111.32, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 111.32, + "end": 114.74, + "confidence": 0.997 + }, + { + "text": "got", + "start": 114.74, + "end": 114.98, + "confidence": 0.985 + }, + { + "text": "them", + "start": 114.98, + "end": 115.0, + "confidence": 0.997 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto.cpu/empty.mp3.words.json b/tests/expected/medium_auto.cpu/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..4343ff36131a9ff2cd0f90ee1eb0dcace7c2c1e4 --- /dev/null +++ b/tests/expected/medium_auto.cpu/empty.mp3.words.json @@ -0,0 +1,46 @@ +{ + "text": " Thanks for watching!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.02, + "end": 3.06, + "text": " Thanks for watching!", + "tokens": [ + 50364, + 2561, + 337, + 1976, + 0, + 50514 + ], + "temperature": 0.0, + "avg_logprob": -0.8994035720825195, + "compression_ratio": 0.7142857142857143, + "no_speech_prob": 0.6661779880523682, + "confidence": 0.373, + "words": [ + { + "text": "Thanks", + "start": 0.02, + "end": 0.44, + "confidence": 0.06 + }, + { + "text": "for", + "start": 0.44, + "end": 1.02, + "confidence": 0.916 + }, + { + "text": "watching!", + "start": 1.02, + "end": 3.06, + "confidence": 0.936 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto.cpu/radio_short.mp3.words.json b/tests/expected/medium_auto.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ad9acab69d8c24acd10bd904b99b423c6c97ec --- /dev/null +++ b/tests/expected/medium_auto.cpu/radio_short.mp3.words.json @@ -0,0 +1,1616 @@ +{ + "text": "3212122222222211111111111111111111111111111111111111111111111111111111", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.04, + "end": 0.64, + "text": "3", + "tokens": [ + 18 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.029, + "words": [ + { + "text": "3", + "start": 0.04, + "end": 0.64, + "confidence": 0.029 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.52, + "end": 3.0, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.688, + "words": [ + { + "text": "2", + "start": 1.52, + "end": 3.0, + "confidence": 0.688 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.68, + "end": 5.32, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.95, + "words": [ + { + "text": "1", + "start": 3.68, + "end": 5.32, + "confidence": 0.95 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 6.5, + "end": 7.02, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.278, + "words": [ + { + "text": "2", + "start": 6.5, + "end": 7.02, + "confidence": 0.278 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 8.08, + "end": 9.68, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.702, + "words": [ + { + "text": "1", + "start": 8.08, + "end": 9.68, + "confidence": 0.702 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 10.38, + "end": 11.04, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.919, + "words": [ + { + "text": "2", + "start": 10.38, + "end": 11.04, + "confidence": 0.919 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 11.54, + "end": 13.04, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.657, + "words": [ + { + "text": "2", + "start": 11.54, + "end": 13.04, + "confidence": 0.657 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.52, + "end": 15.57, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.772, + "words": [ + { + "text": "2", + "start": 13.52, + "end": 15.57, + "confidence": 0.772 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.57, + "end": 16.98, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.905, + "words": [ + { + "text": "2", + "start": 15.57, + "end": 16.98, + "confidence": 0.905 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.52, + "end": 19.22, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.808, + "words": [ + { + "text": "2", + "start": 17.52, + "end": 19.22, + "confidence": 0.808 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 19.62, + "end": 20.68, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.626, + "words": [ + { + "text": "2", + "start": 19.62, + "end": 20.68, + "confidence": 0.626 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 22.22, + "end": 22.86, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.835, + "words": [ + { + "text": "2", + "start": 22.22, + "end": 22.86, + "confidence": 0.835 + } + ] + }, + { + "id": 12, + "seek": 0, + "start": 23.52, + "end": 24.68, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.974, + "words": [ + { + "text": "2", + "start": 23.52, + "end": 24.68, + "confidence": 0.974 + } + ] + }, + { + "id": 13, + "seek": 0, + "start": 25.7, + "end": 27.1, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.97, + "words": [ + { + "text": "2", + "start": 25.7, + "end": 27.1, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 2800, + "start": 28.02, + "end": 28.86, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.271, + "words": [ + { + "text": "1", + "start": 28.02, + "end": 28.86, + "confidence": 0.271 + } + ] + }, + { + "id": 15, + "seek": 2800, + "start": 30.5, + "end": 31.12, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.504, + "words": [ + { + "text": "1", + "start": 30.5, + "end": 31.12, + "confidence": 0.504 + } + ] + }, + { + "id": 16, + "seek": 2800, + "start": 31.52, + "end": 33.55, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.911, + "words": [ + { + "text": "1", + "start": 31.52, + "end": 33.55, + "confidence": 0.911 + } + ] + }, + { + "id": 17, + "seek": 2800, + "start": 33.55, + "end": 35.02, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.894, + "words": [ + { + "text": "1", + "start": 33.55, + "end": 35.02, + "confidence": 0.894 + } + ] + }, + { + "id": 18, + "seek": 2800, + "start": 36.24, + "end": 37.52, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 36.24, + "end": 37.52, + "confidence": 0.957 + } + ] + }, + { + "id": 19, + "seek": 2800, + "start": 37.68, + "end": 39.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.958, + "words": [ + { + "text": "1", + "start": 37.68, + "end": 39.8, + "confidence": 0.958 + } + ] + }, + { + "id": 20, + "seek": 2800, + "start": 39.8, + "end": 41.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 39.8, + "end": 41.2, + "confidence": 0.967 + } + ] + }, + { + "id": 21, + "seek": 2800, + "start": 41.52, + "end": 44.02, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.973, + "words": [ + { + "text": "1", + "start": 41.52, + "end": 44.02, + "confidence": 0.973 + } + ] + }, + { + "id": 22, + "seek": 2800, + "start": 44.02, + "end": 45.04, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.944, + "words": [ + { + "text": "1", + "start": 44.02, + "end": 45.04, + "confidence": 0.944 + } + ] + }, + { + "id": 23, + "seek": 2800, + "start": 45.52, + "end": 47.53, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.959, + "words": [ + { + "text": "1", + "start": 45.52, + "end": 47.53, + "confidence": 0.959 + } + ] + }, + { + "id": 24, + "seek": 2800, + "start": 47.53, + "end": 48.94, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 47.53, + "end": 48.94, + "confidence": 0.966 + } + ] + }, + { + "id": 25, + "seek": 2800, + "start": 50.2, + "end": 52.06, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.968, + "words": [ + { + "text": "1", + "start": 50.2, + "end": 52.06, + "confidence": 0.968 + } + ] + }, + { + "id": 26, + "seek": 2800, + "start": 52.32, + "end": 52.84, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.964, + "words": [ + { + "text": "1", + "start": 52.32, + "end": 52.84, + "confidence": 0.964 + } + ] + }, + { + "id": 27, + "seek": 2800, + "start": 53.64, + "end": 54.9, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.944, + "words": [ + { + "text": "1", + "start": 53.64, + "end": 54.9, + "confidence": 0.944 + } + ] + }, + { + "id": 28, + "seek": 5600, + "start": 56.02, + "end": 57.06, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.377, + "words": [ + { + "text": "1", + "start": 56.02, + "end": 57.06, + "confidence": 0.377 + } + ] + }, + { + "id": 29, + "seek": 5600, + "start": 57.52, + "end": 58.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.763, + "words": [ + { + "text": "1", + "start": 57.52, + "end": 58.8, + "confidence": 0.763 + } + ] + }, + { + "id": 30, + "seek": 5600, + "start": 59.8, + "end": 61.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 59.8, + "end": 61.08, + "confidence": 0.952 + } + ] + }, + { + "id": 31, + "seek": 5600, + "start": 61.52, + "end": 63.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.949, + "words": [ + { + "text": "1", + "start": 61.52, + "end": 63.08, + "confidence": 0.949 + } + ] + }, + { + "id": 32, + "seek": 5600, + "start": 64.04, + "end": 65.18, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 64.04, + "end": 65.18, + "confidence": 0.954 + } + ] + }, + { + "id": 33, + "seek": 5600, + "start": 65.52, + "end": 66.7, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 65.52, + "end": 66.7, + "confidence": 0.952 + } + ] + }, + { + "id": 34, + "seek": 5600, + "start": 67.52, + "end": 69.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 67.52, + "end": 69.08, + "confidence": 0.966 + } + ] + }, + { + "id": 35, + "seek": 5600, + "start": 69.58, + "end": 71.65, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.958, + "words": [ + { + "text": "1", + "start": 69.58, + "end": 71.65, + "confidence": 0.958 + } + ] + }, + { + "id": 36, + "seek": 5600, + "start": 71.65, + "end": 73.62, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 71.65, + "end": 73.62, + "confidence": 0.957 + } + ] + }, + { + "id": 37, + "seek": 5600, + "start": 73.62, + "end": 75.85, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.953, + "words": [ + { + "text": "1", + "start": 73.62, + "end": 75.85, + "confidence": 0.953 + } + ] + }, + { + "id": 38, + "seek": 5600, + "start": 75.85, + "end": 77.12, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.942, + "words": [ + { + "text": "1", + "start": 75.85, + "end": 77.12, + "confidence": 0.942 + } + ] + }, + { + "id": 39, + "seek": 5600, + "start": 78.22, + "end": 78.78, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.936, + "words": [ + { + "text": "1", + "start": 78.22, + "end": 78.78, + "confidence": 0.936 + } + ] + }, + { + "id": 40, + "seek": 5600, + "start": 80.08, + "end": 80.86, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.929, + "words": [ + { + "text": "1", + "start": 80.08, + "end": 80.86, + "confidence": 0.929 + } + ] + }, + { + "id": 41, + "seek": 5600, + "start": 81.52, + "end": 82.7, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.903, + "words": [ + { + "text": "1", + "start": 81.52, + "end": 82.7, + "confidence": 0.903 + } + ] + }, + { + "id": 42, + "seek": 8400, + "start": 84.88, + "end": 85.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.82, + "words": [ + { + "text": "1", + "start": 84.88, + "end": 85.08, + "confidence": 0.82 + } + ] + }, + { + "id": 43, + "seek": 8400, + "start": 85.58, + "end": 86.86, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.884, + "words": [ + { + "text": "1", + "start": 85.58, + "end": 86.86, + "confidence": 0.884 + } + ] + }, + { + "id": 44, + "seek": 8400, + "start": 88.02, + "end": 89.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 88.02, + "end": 89.2, + "confidence": 0.957 + } + ] + }, + { + "id": 45, + "seek": 8400, + "start": 89.52, + "end": 90.98, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 89.52, + "end": 90.98, + "confidence": 0.952 + } + ] + }, + { + "id": 46, + "seek": 8400, + "start": 92.5, + "end": 93.04, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.917, + "words": [ + { + "text": "1", + "start": 92.5, + "end": 93.04, + "confidence": 0.917 + } + ] + }, + { + "id": 47, + "seek": 8400, + "start": 93.58, + "end": 95.56, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.947, + "words": [ + { + "text": "1", + "start": 93.58, + "end": 95.56, + "confidence": 0.947 + } + ] + }, + { + "id": 48, + "seek": 8400, + "start": 95.6, + "end": 97.95, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.97, + "words": [ + { + "text": "1", + "start": 95.6, + "end": 97.95, + "confidence": 0.97 + } + ] + }, + { + "id": 49, + "seek": 8400, + "start": 97.95, + "end": 99.14, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.976, + "words": [ + { + "text": "1", + "start": 97.95, + "end": 99.14, + "confidence": 0.976 + } + ] + }, + { + "id": 50, + "seek": 8400, + "start": 99.52, + "end": 101.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.984, + "words": [ + { + "text": "1", + "start": 99.52, + "end": 101.08, + "confidence": 0.984 + } + ] + }, + { + "id": 51, + "seek": 8400, + "start": 101.52, + "end": 103.62, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.985, + "words": [ + { + "text": "1", + "start": 101.52, + "end": 103.62, + "confidence": 0.985 + } + ] + }, + { + "id": 52, + "seek": 8400, + "start": 103.62, + "end": 105.7, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.988, + "words": [ + { + "text": "1", + "start": 103.62, + "end": 105.7, + "confidence": 0.988 + } + ] + }, + { + "id": 53, + "seek": 8400, + "start": 106.5, + "end": 107.12, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 106.5, + "end": 107.12, + "confidence": 0.957 + } + ] + }, + { + "id": 54, + "seek": 8400, + "start": 108.5, + "end": 109.22, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.946, + "words": [ + { + "text": "1", + "start": 108.5, + "end": 109.22, + "confidence": 0.946 + } + ] + }, + { + "id": 55, + "seek": 8400, + "start": 109.52, + "end": 111.36, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.89, + "words": [ + { + "text": "1", + "start": 109.52, + "end": 111.36, + "confidence": 0.89 + } + ] + }, + { + "id": 56, + "seek": 11200, + "start": 112.98, + "end": 113.34, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.895, + "words": [ + { + "text": "1", + "start": 112.98, + "end": 113.34, + "confidence": 0.895 + } + ] + }, + { + "id": 57, + "seek": 11200, + "start": 113.88, + "end": 114.52, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.871, + "words": [ + { + "text": "1", + "start": 113.88, + "end": 114.52, + "confidence": 0.871 + } + ] + }, + { + "id": 58, + "seek": 11200, + "start": 115.94, + "end": 117.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.977, + "words": [ + { + "text": "1", + "start": 115.94, + "end": 117.2, + "confidence": 0.977 + } + ] + }, + { + "id": 59, + "seek": 11200, + "start": 117.52, + "end": 118.64, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.975, + "words": [ + { + "text": "1", + "start": 117.52, + "end": 118.64, + "confidence": 0.975 + } + ] + }, + { + "id": 60, + "seek": 11200, + "start": 120.5, + "end": 120.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 120.5, + "end": 120.8, + "confidence": 0.957 + } + ] + }, + { + "id": 61, + "seek": 11200, + "start": 121.58, + "end": 123.74, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.959, + "words": [ + { + "text": "1", + "start": 121.58, + "end": 123.74, + "confidence": 0.959 + } + ] + }, + { + "id": 62, + "seek": 11200, + "start": 123.9, + "end": 125.56, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 123.9, + "end": 125.56, + "confidence": 0.967 + } + ] + }, + { + "id": 63, + "seek": 11200, + "start": 125.56, + "end": 127.34, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.968, + "words": [ + { + "text": "1", + "start": 125.56, + "end": 127.34, + "confidence": 0.968 + } + ] + }, + { + "id": 64, + "seek": 11200, + "start": 127.52, + "end": 129.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.96, + "words": [ + { + "text": "1", + "start": 127.52, + "end": 129.2, + "confidence": 0.96 + } + ] + }, + { + "id": 65, + "seek": 11200, + "start": 129.52, + "end": 131.06, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.965, + "words": [ + { + "text": "1", + "start": 129.52, + "end": 131.06, + "confidence": 0.965 + } + ] + }, + { + "id": 66, + "seek": 11200, + "start": 131.52, + "end": 133.3, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.969, + "words": [ + { + "text": "1", + "start": 131.52, + "end": 133.3, + "confidence": 0.969 + } + ] + }, + { + "id": 67, + "seek": 11200, + "start": 133.52, + "end": 135.73, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 133.52, + "end": 135.73, + "confidence": 0.966 + } + ] + }, + { + "id": 68, + "seek": 11200, + "start": 135.73, + "end": 137.34, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.971, + "words": [ + { + "text": "1", + "start": 135.73, + "end": 137.34, + "confidence": 0.971 + } + ] + }, + { + "id": 69, + "seek": 11200, + "start": 137.52, + "end": 139.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.925, + "words": [ + { + "text": "1", + "start": 137.52, + "end": 139.8, + "confidence": 0.925 + } + ] + } + ], + "language": "zh" +} \ No newline at end of file diff --git a/tests/expected/medium_auto.cpu/smartphone.mp3.words.json b/tests/expected/medium_auto.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9faa961d5cf4a38b6ac04b76010aeaddb28cee89 --- /dev/null +++ b/tests/expected/medium_auto.cpu/smartphone.mp3.words.json @@ -0,0 +1,4802 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc, à objet inédit, rapport inédit. Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Et bien, le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 3.62, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.38, + "end": 0.58, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.58, + "end": 0.88, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.88, + "end": 1.02, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.02, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.78, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.78, + "end": 1.9, + "confidence": 0.979 + }, + { + "text": "je", + "start": 1.9, + "end": 2.24, + "confidence": 0.982 + }, + { + "text": "ne", + "start": 2.24, + "end": 2.34, + "confidence": 0.835 + }, + { + "text": "me", + "start": 2.34, + "end": 2.38, + "confidence": 0.82 + }, + { + "text": "l'étais", + "start": 2.38, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.84, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.84, + "end": 3.26, + "confidence": 0.909 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.42, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.42, + "end": 3.62, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.08, + "end": 7.92, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.08, + "end": 4.26, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.34, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.48, + "confidence": 0.565 + }, + { + "text": "la", + "start": 4.48, + "end": 4.66, + "confidence": 0.971 + }, + { + "text": "force", + "start": 4.66, + "end": 5.0, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.0, + "end": 5.2, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.88, + "confidence": 0.911 + }, + { + "text": "c'est", + "start": 5.88, + "end": 6.12, + "confidence": 0.88 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.26, + "confidence": 0.992 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.52, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.52, + "end": 7.38, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.38, + "end": 7.56, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 7.92, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.44, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.44, + "end": 8.6, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.6, + "end": 8.9, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.9, + "end": 9.1, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.1, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.32, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.32, + "end": 10.58, + "confidence": 0.956 + }, + { + "text": "elles.", + "start": 10.58, + "end": 10.88, + "confidence": 0.99 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.607 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.22, + "confidence": 0.765 + }, + { + "text": "dit", + "start": 11.22, + "end": 11.4, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.4, + "end": 11.56, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.56, + "end": 11.72, + "confidence": 0.477 + }, + { + "text": "la", + "start": 11.72, + "end": 11.78, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.78, + "end": 12.12, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.12, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.42, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.42, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 16.02, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.913, + "words": [ + { + "text": "Alors", + "start": 13.34, + "end": 13.62, + "confidence": 0.585 + }, + { + "text": "évidemment,", + "start": 13.62, + "end": 14.34, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.34, + "end": 14.38, + "confidence": 0.952 + }, + { + "text": "faudrait", + "start": 14.38, + "end": 14.74, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.74, + "end": 15.16, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.16, + "end": 15.52, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.52, + "end": 16.02, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.7, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.7, + "end": 17.06, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.06, + "end": 17.26, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.26, + "end": 17.88, + "confidence": 0.975 + }, + { + "text": "beaucoup", + "start": 17.88, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 19.84, + "end": 25.26, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 19.84, + "end": 20.22, + "confidence": 0.944 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.52, + "confidence": 0.667 + }, + { + "text": "il", + "start": 20.52, + "end": 20.6, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.6, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.84, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.84, + "end": 20.96, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 20.96, + "end": 21.26, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.68, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.68, + "end": 21.9, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.9, + "end": 22.1, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.1, + "end": 22.32, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.48, + "confidence": 0.72 + }, + { + "text": "lesquels", + "start": 22.48, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.04, + "confidence": 0.709 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.965 + }, + { + "text": "nous", + "start": 23.54, + "end": 23.72, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.72, + "end": 23.82, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.82, + "end": 24.1, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.1, + "end": 24.24, + "confidence": 0.829 + }, + { + "text": "cliquer", + "start": 24.24, + "end": 24.5, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.5, + "end": 24.66, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.66, + "end": 24.94, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.94, + "end": 25.26, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.42, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Sauf", + "start": 25.42, + "end": 25.76, + "confidence": 0.99 + }, + { + "text": "que", + "start": 25.76, + "end": 26.26, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.26, + "end": 26.66, + "confidence": 0.631 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.06, + "confidence": 0.996 + }, + { + "text": "ajoute", + "start": 27.06, + "end": 27.44, + "confidence": 0.991 + }, + { + "text": "le", + "start": 27.44, + "end": 27.62, + "confidence": 0.992 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.18, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 28.18, + "end": 28.22, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 28.22, + "end": 28.28, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.28, + "end": 28.48, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.48, + "end": 28.68, + "confidence": 0.993 + }, + { + "text": "contact", + "start": 28.68, + "end": 29.1, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.1, + "end": 29.46, + "confidence": 0.985 + }, + { + "text": "direct,", + "start": 29.46, + "end": 30.22, + "confidence": 0.995 + }, + { + "text": "plus", + "start": 30.22, + "end": 30.26, + "confidence": 0.994 + }, + { + "text": "sensible.", + "start": 30.26, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.892, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.22, + "confidence": 0.97 + }, + { + "text": "puis", + "start": 31.22, + "end": 31.36, + "confidence": 0.971 + }, + { + "text": "évidemment,", + "start": 31.36, + "end": 31.7, + "confidence": 0.875 + }, + { + "text": "il", + "start": 31.7, + "end": 31.74, + "confidence": 0.993 + }, + { + "text": "faudrait", + "start": 31.74, + "end": 31.94, + "confidence": 0.995 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.12, + "confidence": 0.84 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.34, + "confidence": 0.977 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.994 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.9, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.9, + "end": 33.18, + "confidence": 0.481 + }, + { + "text": "permettent", + "start": 33.18, + "end": 33.74, + "confidence": 0.992 + }, + { + "text": "de", + "start": 33.74, + "end": 33.96, + "confidence": 0.885 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.42, + "confidence": 0.958 + }, + { + "text": "le", + "start": 34.42, + "end": 34.52, + "confidence": 0.775 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.8, + "confidence": 0.984 + }, + { + "text": "touffu", + "start": 34.8, + "end": 35.32, + "confidence": 0.741 + }, + { + "text": "de", + "start": 35.32, + "end": 35.72, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.72, + "end": 35.78, + "confidence": 0.992 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.994 + }, + { + "text": "web", + "start": 36.24, + "end": 36.6, + "confidence": 0.854 + }, + { + "text": "pour", + "start": 36.6, + "end": 36.78, + "confidence": 0.585 + }, + { + "text": "aller", + "start": 36.78, + "end": 36.98, + "confidence": 0.987 + }, + { + "text": "directement", + "start": 36.98, + "end": 37.52, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.52, + "end": 37.68, + "confidence": 0.967 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.82, + "end": 46.54, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.974, + "words": [ + { + "text": "Bref,", + "start": 37.82, + "end": 38.76, + "confidence": 0.987 + }, + { + "text": "tout", + "start": 38.76, + "end": 38.98, + "confidence": 0.711 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.42, + "end": 39.7, + "confidence": 0.993 + }, + { + "text": "sont", + "start": 39.7, + "end": 39.88, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.88, + "end": 40.16, + "confidence": 0.991 + }, + { + "text": "conditions", + "start": 40.16, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.68, + "end": 40.96, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.46, + "confidence": 0.997 + }, + { + "text": "de", + "start": 41.46, + "end": 41.6, + "confidence": 0.998 + }, + { + "text": "créer", + "start": 41.6, + "end": 42.06, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.06, + "end": 42.38, + "confidence": 0.998 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.6, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.6, + "end": 42.8, + "confidence": 0.779 + }, + { + "text": "Nicolas", + "start": 42.8, + "end": 43.26, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 43.26, + "end": 43.5, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.5, + "end": 43.7, + "confidence": 0.983 + }, + { + "text": "est", + "start": 43.7, + "end": 43.88, + "confidence": 0.991 + }, + { + "text": "vraisemblablement", + "start": 43.88, + "end": 44.98, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 44.98, + "end": 45.38, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 45.38, + "end": 45.7, + "confidence": 0.969 + }, + { + "text": "l'histoire", + "start": 45.7, + "end": 45.98, + "confidence": 0.957 + }, + { + "text": "de", + "start": 45.98, + "end": 46.18, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.18, + "end": 46.54, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 46.54, + "end": 48.82, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 46.54, + "end": 47.24, + "confidence": 0.846 + }, + { + "text": "ça,", + "start": 47.24, + "end": 47.72, + "confidence": 0.935 + }, + { + "text": "ça", + "start": 47.72, + "end": 47.76, + "confidence": 0.977 + }, + { + "text": "soulève", + "start": 47.76, + "end": 47.84, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.84, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.26, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.26, + "end": 48.82, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.22, + "end": 55.46, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit?", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.988, + "words": [ + { + "text": "Est-ce", + "start": 49.22, + "end": 49.62, + "confidence": 0.982 + }, + { + "text": "que", + "start": 49.62, + "end": 49.72, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.72, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 49.98, + "confidence": 0.999 + }, + { + "text": "que", + "start": 49.98, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.991 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.8, + "confidence": 0.996 + }, + { + "text": "induit", + "start": 51.8, + "end": 52.32, + "confidence": 0.977 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.983 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.72, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 52.72, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.44, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.66, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.66, + "end": 54.02, + "confidence": 0.916 + }, + { + "text": "aussi", + "start": 54.02, + "end": 54.54, + "confidence": 0.995 + }, + { + "text": "un", + "start": 54.54, + "end": 54.7, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 54.7, + "end": 55.0, + "confidence": 0.996 + }, + { + "text": "inédit?", + "start": 55.0, + "end": 55.46, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.46, + "end": 63.12, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone?", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.965, + "words": [ + { + "text": "Je", + "start": 55.46, + "end": 55.9, + "confidence": 0.88 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.988 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.24, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.24, + "end": 56.36, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.36, + "end": 56.42, + "confidence": 0.99 + }, + { + "text": "le", + "start": 56.42, + "end": 56.58, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.88, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.88, + "end": 57.04, + "confidence": 0.986 + }, + { + "text": "a", + "start": 57.04, + "end": 57.18, + "confidence": 0.989 + }, + { + "text": "au", + "start": 57.18, + "end": 57.28, + "confidence": 0.968 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.994 + }, + { + "text": "est", + "start": 57.6, + "end": 57.92, + "confidence": 0.95 + }, + { + "text": "comparable", + "start": 57.92, + "end": 58.24, + "confidence": 0.997 + }, + { + "text": "à", + "start": 58.24, + "end": 58.48, + "confidence": 0.949 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.9, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.9, + "end": 59.32, + "confidence": 0.929 + }, + { + "text": "à", + "start": 59.32, + "end": 59.46, + "confidence": 0.958 + }, + { + "text": "d'autres", + "start": 59.46, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.96, + "confidence": 0.991 + }, + { + "text": "techniques", + "start": 59.96, + "end": 60.46, + "confidence": 0.983 + }, + { + "text": "comme", + "start": 60.46, + "end": 60.88, + "confidence": 0.587 + }, + { + "text": "la", + "start": 60.88, + "end": 61.5, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.5, + "end": 62.06, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.06, + "end": 62.36, + "confidence": 0.945 + }, + { + "text": "le", + "start": 62.36, + "end": 62.68, + "confidence": 0.998 + }, + { + "text": "téléphone?", + "start": 62.68, + "end": 63.12, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 63.36, + "end": 66.66, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.936, + "words": [ + { + "text": "Il", + "start": 63.36, + "end": 65.42, + "confidence": 0.779 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.48, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.48, + "end": 65.54, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 65.54, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.22, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.22, + "end": 66.42, + "confidence": 0.601 + }, + { + "text": "fait.", + "start": 66.42, + "end": 66.66, + "confidence": 0.996 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.52, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.953, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 66.98, + "confidence": 0.599 + }, + { + "text": "donc", + "start": 66.98, + "end": 67.08, + "confidence": 0.901 + }, + { + "text": "cette", + "start": 67.08, + "end": 67.28, + "confidence": 0.71 + }, + { + "text": "espèce", + "start": 67.28, + "end": 67.54, + "confidence": 0.995 + }, + { + "text": "de", + "start": 67.54, + "end": 67.68, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.68, + "end": 68.48, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.66, + "confidence": 0.98 + }, + { + "text": "la", + "start": 68.66, + "end": 68.94, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.94, + "end": 69.22, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.22, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.24, + "confidence": 0.997 + }, + { + "text": "c'est", + "start": 70.24, + "end": 70.38, + "confidence": 0.98 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.64, + "confidence": 0.978 + }, + { + "text": "et", + "start": 70.64, + "end": 70.76, + "confidence": 0.964 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.52, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 4884, + "start": 71.62, + "end": 76.48, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet,", + "tokens": [ + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 1719, + 368, + 8603, + 14964, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.813, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.86, + "confidence": 0.512 + }, + { + "text": "qu'on", + "start": 71.86, + "end": 72.12, + "confidence": 0.937 + }, + { + "text": "a", + "start": 72.12, + "end": 72.44, + "confidence": 0.982 + }, + { + "text": "l'impression,", + "start": 72.44, + "end": 73.56, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 73.56, + "end": 73.84, + "confidence": 0.964 + }, + { + "text": "le", + "start": 73.84, + "end": 74.0, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.0, + "end": 74.2, + "confidence": 0.998 + }, + { + "text": "les", + "start": 74.2, + "end": 74.4, + "confidence": 0.994 + }, + { + "text": "utilisateurs", + "start": 74.4, + "end": 74.84, + "confidence": 0.997 + }, + { + "text": "et", + "start": 74.84, + "end": 74.96, + "confidence": 0.329 + }, + { + "text": "les", + "start": 74.96, + "end": 75.0, + "confidence": 0.773 + }, + { + "text": "services,", + "start": 75.0, + "end": 75.22, + "confidence": 0.214 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.42, + "confidence": 0.766 + }, + { + "text": "dépendants", + "start": 75.42, + "end": 75.96, + "confidence": 0.789 + }, + { + "text": "de", + "start": 75.96, + "end": 76.08, + "confidence": 0.986 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.996 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.48, + "confidence": 0.996 + } + ] + }, + { + "id": 16, + "seek": 7684, + "start": 76.86, + "end": 83.26, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.917, + "words": [ + { + "text": "d'induire", + "start": 76.86, + "end": 77.08, + "confidence": 0.841 + }, + { + "text": "en", + "start": 77.08, + "end": 77.24, + "confidence": 0.618 + }, + { + "text": "fait", + "start": 77.24, + "end": 77.34, + "confidence": 0.994 + }, + { + "text": "une", + "start": 77.34, + "end": 77.52, + "confidence": 0.983 + }, + { + "text": "espèce", + "start": 77.52, + "end": 77.88, + "confidence": 0.996 + }, + { + "text": "de", + "start": 77.88, + "end": 78.48, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.48, + "end": 78.6, + "confidence": 0.596 + }, + { + "text": "de", + "start": 78.6, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.74, + "confidence": 0.967 + }, + { + "text": "le", + "start": 79.74, + "end": 79.92, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 79.92, + "end": 80.64, + "confidence": 0.992 + }, + { + "text": "qui", + "start": 80.64, + "end": 81.1, + "confidence": 0.696 + }, + { + "text": "rend", + "start": 81.1, + "end": 81.64, + "confidence": 0.91 + }, + { + "text": "de", + "start": 81.64, + "end": 81.78, + "confidence": 0.712 + }, + { + "text": "l'ampleur", + "start": 81.78, + "end": 82.02, + "confidence": 0.987 + }, + { + "text": "et", + "start": 82.02, + "end": 82.12, + "confidence": 0.931 + }, + { + "text": "qui", + "start": 82.12, + "end": 82.24, + "confidence": 0.976 + }, + { + "text": "amène", + "start": 82.24, + "end": 82.36, + "confidence": 0.973 + }, + { + "text": "aussi", + "start": 82.36, + "end": 82.56, + "confidence": 0.938 + }, + { + "text": "à", + "start": 82.56, + "end": 82.62, + "confidence": 0.941 + }, + { + "text": "des", + "start": 82.62, + "end": 82.72, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.72, + "end": 82.9, + "confidence": 0.993 + }, + { + "text": "de", + "start": 82.9, + "end": 83.02, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.02, + "end": 83.26, + "confidence": 0.883 + } + ] + }, + { + "id": 17, + "seek": 7684, + "start": 83.94, + "end": 87.8, + "text": " Donc, à objet inédit, rapport inédit.", + "tokens": [ + 7477, + 11, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.962, + "words": [ + { + "text": "Donc,", + "start": 83.94, + "end": 84.94, + "confidence": 0.971 + }, + { + "text": "à", + "start": 84.94, + "end": 84.98, + "confidence": 0.88 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.36, + "confidence": 0.828 + }, + { + "text": "inédit,", + "start": 85.36, + "end": 86.56, + "confidence": 0.993 + }, + { + "text": "rapport", + "start": 86.56, + "end": 87.0, + "confidence": 0.981 + }, + { + "text": "inédit.", + "start": 87.0, + "end": 87.8, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7684, + "start": 88.02, + "end": 95.14, + "text": " Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.972, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.48, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 88.48, + "end": 88.86, + "confidence": 0.975 + }, + { + "text": "rapport,", + "start": 88.86, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.84, + "confidence": 0.996 + }, + { + "text": "crois", + "start": 89.84, + "end": 89.88, + "confidence": 0.984 + }, + { + "text": "Nicolas,", + "start": 89.88, + "end": 90.54, + "confidence": 0.681 + }, + { + "text": "serait", + "start": 90.54, + "end": 90.94, + "confidence": 0.885 + }, + { + "text": "caractérisé", + "start": 90.94, + "end": 91.8, + "confidence": 0.993 + }, + { + "text": "par", + "start": 91.8, + "end": 92.12, + "confidence": 0.997 + }, + { + "text": "un", + "start": 92.12, + "end": 92.44, + "confidence": 0.997 + }, + { + "text": "mélange", + "start": 92.44, + "end": 92.98, + "confidence": 0.999 + }, + { + "text": "de", + "start": 92.98, + "end": 93.4, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.4, + "end": 94.24, + "confidence": 0.953 + }, + { + "text": "et", + "start": 94.24, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.68, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.68, + "end": 95.14, + "confidence": 0.993 + } + ] + }, + { + "id": 19, + "seek": 7684, + "start": 95.78, + "end": 102.86, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.941, + "words": [ + { + "text": "Bon,", + "start": 95.78, + "end": 96.38, + "confidence": 0.792 + }, + { + "text": "en", + "start": 96.38, + "end": 96.52, + "confidence": 0.998 + }, + { + "text": "vrai,", + "start": 96.52, + "end": 97.14, + "confidence": 0.994 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.998 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.997 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.08, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.08, + "end": 98.6, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.6, + "end": 98.7, + "confidence": 0.768 + }, + { + "text": "finement", + "start": 98.7, + "end": 99.32, + "confidence": 0.849 + }, + { + "text": "toute", + "start": 99.32, + "end": 99.7, + "confidence": 0.984 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.06, + "confidence": 0.997 + }, + { + "text": "des", + "start": 100.06, + "end": 100.24, + "confidence": 0.998 + }, + { + "text": "objets", + "start": 100.24, + "end": 100.48, + "confidence": 0.999 + }, + { + "text": "techniques", + "start": 100.48, + "end": 101.02, + "confidence": 0.984 + }, + { + "text": "et", + "start": 101.02, + "end": 101.48, + "confidence": 0.527 + }, + { + "text": "de", + "start": 101.48, + "end": 101.68, + "confidence": 0.994 + }, + { + "text": "leur", + "start": 101.68, + "end": 101.84, + "confidence": 0.833 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.32, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.32, + "end": 102.48, + "confidence": 0.99 + }, + { + "text": "nos", + "start": 102.48, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.86, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 7684, + "start": 102.9, + "end": 105.74, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.981, + "words": [ + { + "text": "pour", + "start": 102.9, + "end": 103.06, + "confidence": 0.822 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.66, + "confidence": 0.997 + }, + { + "text": "si", + "start": 103.66, + "end": 103.76, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.997 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.26, + "end": 104.74, + "confidence": 0.998 + }, + { + "text": "totalement", + "start": 104.74, + "end": 105.3, + "confidence": 0.999 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.74, + "confidence": 0.999 + } + ] + }, + { + "id": 21, + "seek": 10584, + "start": 106.1, + "end": 109.34, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.94, + "words": [ + { + "text": "Mais", + "start": 106.1, + "end": 106.36, + "confidence": 0.947 + }, + { + "text": "j'ai", + "start": 106.36, + "end": 106.92, + "confidence": 0.941 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.36, + "confidence": 0.996 + }, + { + "text": "comme", + "start": 107.36, + "end": 107.56, + "confidence": 0.641 + }, + { + "text": "ça", + "start": 107.56, + "end": 107.82, + "confidence": 0.978 + }, + { + "text": "que", + "start": 107.82, + "end": 107.96, + "confidence": 0.976 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.46, + "confidence": 0.985 + }, + { + "text": "ne", + "start": 108.46, + "end": 108.66, + "confidence": 0.726 + }, + { + "text": "se", + "start": 108.66, + "end": 108.7, + "confidence": 0.991 + }, + { + "text": "trompe", + "start": 108.7, + "end": 108.88, + "confidence": 0.995 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.34, + "confidence": 0.991 + } + ] + }, + { + "id": 22, + "seek": 10584, + "start": 109.88, + "end": 114.98, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.969, + "words": [ + { + "text": "Pour", + "start": 109.88, + "end": 110.08, + "confidence": 0.997 + }, + { + "text": "autant", + "start": 110.08, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.42, + "confidence": 0.988 + }, + { + "text": "je", + "start": 110.42, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 111.14, + "confidence": 0.963 + }, + { + "text": "il", + "start": 111.14, + "end": 111.18, + "confidence": 0.997 + }, + { + "text": "y", + "start": 111.18, + "end": 111.32, + "confidence": 0.992 + }, + { + "text": "a", + "start": 111.32, + "end": 111.36, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.36, + "end": 111.68, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.68, + "end": 111.88, + "confidence": 0.974 + }, + { + "text": "de", + "start": 111.88, + "end": 112.06, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.06, + "end": 112.6, + "confidence": 0.799 + }, + { + "text": "autour", + "start": 112.6, + "end": 112.94, + "confidence": 0.995 + }, + { + "text": "de", + "start": 112.94, + "end": 113.46, + "confidence": 0.997 + }, + { + "text": "la", + "start": 113.46, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.86, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.86, + "end": 114.06, + "confidence": 0.765 + }, + { + "text": "même", + "start": 114.06, + "end": 114.44, + "confidence": 0.996 + }, + { + "text": "du", + "start": 114.44, + "end": 114.6, + "confidence": 0.995 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 114.98, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 10584, + "start": 115.34, + "end": 119.84, + "text": " Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.972, + "words": [ + { + "text": "Mais", + "start": 115.34, + "end": 115.72, + "confidence": 0.994 + }, + { + "text": "la", + "start": 115.72, + "end": 116.02, + "confidence": 0.937 + }, + { + "text": "dépendance", + "start": 116.02, + "end": 116.4, + "confidence": 0.997 + }, + { + "text": "n'était", + "start": 116.4, + "end": 116.62, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.16, + "confidence": 0.995 + }, + { + "text": "même", + "start": 117.16, + "end": 117.46, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 117.46, + "end": 117.78, + "confidence": 0.999 + }, + { + "text": "Donc", + "start": 117.78, + "end": 117.98, + "confidence": 0.804 + }, + { + "text": "le", + "start": 117.98, + "end": 118.34, + "confidence": 0.715 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.62, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.62, + "end": 118.78, + "confidence": 0.975 + }, + { + "text": "plus", + "start": 118.78, + "end": 118.94, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.12, + "confidence": 0.987 + }, + { + "text": "pas", + "start": 119.12, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.38, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.38, + "end": 119.56, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.56, + "end": 119.84, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 10584, + "start": 119.98, + "end": 123.02, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.991, + "words": [ + { + "text": "On", + "start": 119.98, + "end": 120.18, + "confidence": 0.996 + }, + { + "text": "peut", + "start": 120.18, + "end": 120.38, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.38, + "end": 120.66, + "confidence": 0.99 + }, + { + "text": "sa", + "start": 120.66, + "end": 120.88, + "confidence": 0.985 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.46, + "confidence": 0.984 + }, + { + "text": "en", + "start": 121.46, + "end": 121.56, + "confidence": 0.989 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.74, + "confidence": 0.998 + }, + { + "text": "besoin", + "start": 121.74, + "end": 122.1, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.1, + "end": 122.34, + "confidence": 0.987 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.68, + "confidence": 0.989 + }, + { + "text": "de", + "start": 122.68, + "end": 122.8, + "confidence": 0.998 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.02, + "confidence": 0.989 + } + ] + }, + { + "id": 25, + "seek": 10584, + "start": 123.28, + "end": 126.36, + "text": " Et bien, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3610, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.895, + "words": [ + { + "text": "Et", + "start": 123.28, + "end": 123.46, + "confidence": 0.667 + }, + { + "text": "bien,", + "start": 123.46, + "end": 123.86, + "confidence": 0.469 + }, + { + "text": "le", + "start": 123.86, + "end": 123.98, + "confidence": 0.996 + }, + { + "text": "soir,", + "start": 123.98, + "end": 124.68, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 124.68, + "end": 124.9, + "confidence": 0.997 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.998 + }, + { + "text": "va", + "start": 125.02, + "end": 125.14, + "confidence": 0.996 + }, + { + "text": "se", + "start": 125.14, + "end": 125.38, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.38, + "end": 125.8, + "confidence": 0.987 + }, + { + "text": "on", + "start": 125.8, + "end": 126.02, + "confidence": 0.995 + }, + { + "text": "la", + "start": 126.02, + "end": 126.22, + "confidence": 0.802 + }, + { + "text": "laisse.", + "start": 126.22, + "end": 126.36, + "confidence": 0.999 + } + ] + }, + { + "id": 26, + "seek": 10584, + "start": 126.98, + "end": 130.48, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.914, + "words": [ + { + "text": "On", + "start": 126.98, + "end": 127.32, + "confidence": 0.954 + }, + { + "text": "ne", + "start": 127.32, + "end": 127.36, + "confidence": 0.803 + }, + { + "text": "l'a", + "start": 127.36, + "end": 127.48, + "confidence": 0.974 + }, + { + "text": "pas", + "start": 127.48, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.8, + "confidence": 0.996 + }, + { + "text": "la", + "start": 127.8, + "end": 128.06, + "confidence": 0.994 + }, + { + "text": "main", + "start": 128.06, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.44, + "confidence": 0.935 + }, + { + "text": "on", + "start": 128.44, + "end": 128.62, + "confidence": 0.997 + }, + { + "text": "est", + "start": 128.62, + "end": 128.68, + "confidence": 0.993 + }, + { + "text": "au", + "start": 128.68, + "end": 129.04, + "confidence": 0.984 + }, + { + "text": "lit,", + "start": 129.04, + "end": 129.14, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.14, + "end": 129.26, + "confidence": 0.427 + }, + { + "text": "ne", + "start": 129.26, + "end": 129.3, + "confidence": 0.963 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.5, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 129.5, + "end": 129.68, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.68, + "end": 129.86, + "confidence": 0.674 + }, + { + "text": "chiottes.", + "start": 129.86, + "end": 130.48, + "confidence": 0.828 + } + ] + }, + { + "id": 27, + "seek": 13084, + "start": 130.86, + "end": 136.9, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.955, + "words": [ + { + "text": "On", + "start": 130.86, + "end": 131.04, + "confidence": 0.983 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.28, + "end": 131.48, + "confidence": 0.996 + }, + { + "text": "énervé", + "start": 131.48, + "end": 132.22, + "confidence": 0.906 + }, + { + "text": "par", + "start": 132.22, + "end": 132.44, + "confidence": 0.991 + }, + { + "text": "son", + "start": 132.44, + "end": 132.7, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.7, + "end": 133.1, + "confidence": 0.832 + }, + { + "text": "qui", + "start": 133.1, + "end": 133.34, + "confidence": 0.919 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.76, + "confidence": 0.991 + }, + { + "text": "la", + "start": 133.76, + "end": 133.8, + "confidence": 0.992 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.08, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.08, + "end": 134.14, + "confidence": 0.997 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.6, + "confidence": 0.985 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.82, + "confidence": 0.981 + }, + { + "text": "une", + "start": 134.82, + "end": 135.2, + "confidence": 0.87 + }, + { + "text": "heure", + "start": 135.2, + "end": 135.36, + "confidence": 0.998 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.54, + "confidence": 0.991 + }, + { + "text": "soir", + "start": 135.54, + "end": 135.8, + "confidence": 0.996 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.96, + "confidence": 0.79 + }, + { + "text": "discuter", + "start": 135.96, + "end": 136.28, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.28, + "end": 136.48, + "confidence": 0.996 + }, + { + "text": "un", + "start": 136.48, + "end": 136.6, + "confidence": 0.999 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.9, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 13084, + "start": 137.28, + "end": 141.88, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.959, + "words": [ + { + "text": "Mais", + "start": 137.28, + "end": 137.46, + "confidence": 0.993 + }, + { + "text": "ça", + "start": 137.46, + "end": 137.68, + "confidence": 0.938 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.94, + "confidence": 0.998 + }, + { + "text": "ressemblait", + "start": 137.94, + "end": 138.4, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 138.4, + "end": 138.76, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 138.94, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.1, + "confidence": 0.986 + }, + { + "text": "peut", + "start": 139.1, + "end": 139.48, + "confidence": 0.988 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.12, + "confidence": 0.997 + }, + { + "text": "à", + "start": 140.12, + "end": 140.32, + "confidence": 0.498 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.855 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.68, + "confidence": 0.989 + }, + { + "text": "même", + "start": 140.68, + "end": 140.94, + "confidence": 0.984 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.34, + "confidence": 0.998 + }, + { + "text": "aujourd'hui,", + "start": 141.34, + "end": 141.88, + "confidence": 0.988 + } + ] + }, + { + "id": 29, + "seek": 13084, + "start": 142.14, + "end": 146.3, + "text": " continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe,", + "tokens": [ + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.963, + "words": [ + { + "text": "continuellement", + "start": 142.14, + "end": 142.94, + "confidence": 0.971 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.18, + "confidence": 0.805 + }, + { + "text": "son", + "start": 143.18, + "end": 143.38, + "confidence": 0.995 + }, + { + "text": "smartphone", + "start": 143.38, + "end": 143.76, + "confidence": 0.978 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.94, + "confidence": 0.979 + }, + { + "text": "la", + "start": 143.94, + "end": 144.0, + "confidence": 0.996 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.36, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 144.36, + "end": 144.52, + "confidence": 0.835 + }, + { + "text": "si", + "start": 144.52, + "end": 144.64, + "confidence": 0.975 + }, + { + "text": "c'était", + "start": 144.64, + "end": 144.82, + "confidence": 0.991 + }, + { + "text": "une", + "start": 144.82, + "end": 145.06, + "confidence": 0.989 + }, + { + "text": "sorte", + "start": 145.06, + "end": 145.22, + "confidence": 0.997 + }, + { + "text": "de", + "start": 145.22, + "end": 145.3, + "confidence": 0.982 + }, + { + "text": "pacemaker", + "start": 145.3, + "end": 145.82, + "confidence": 0.917 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.3, + "confidence": 0.992 + } + ] + }, + { + "id": 30, + "seek": 13084, + "start": 146.34, + "end": 148.84, + "text": " comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.989, + "words": [ + { + "text": "comme", + "start": 146.34, + "end": 146.58, + "confidence": 0.996 + }, + { + "text": "si", + "start": 146.58, + "end": 146.76, + "confidence": 0.994 + }, + { + "text": "le", + "start": 146.76, + "end": 146.84, + "confidence": 0.997 + }, + { + "text": "lâcher", + "start": 146.84, + "end": 147.36, + "confidence": 0.969 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.56, + "confidence": 0.993 + }, + { + "text": "entraîner", + "start": 147.56, + "end": 147.86, + "confidence": 0.978 + }, + { + "text": "sa", + "start": 147.86, + "end": 148.0, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.0, + "end": 148.22, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.22, + "end": 148.84, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 13084, + "start": 149.04, + "end": 151.96, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.974, + "words": [ + { + "text": "Bon,", + "start": 149.04, + "end": 149.28, + "confidence": 0.918 + }, + { + "text": "je", + "start": 149.28, + "end": 149.32, + "confidence": 0.934 + }, + { + "text": "dis", + "start": 149.32, + "end": 149.46, + "confidence": 0.988 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.994 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.74, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.74, + "end": 149.88, + "confidence": 0.995 + }, + { + "text": "môme,", + "start": 149.88, + "end": 150.32, + "confidence": 0.998 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.52, + "confidence": 0.791 + }, + { + "text": "c'est", + "start": 150.52, + "end": 150.82, + "confidence": 0.981 + }, + { + "text": "évidemment", + "start": 150.82, + "end": 151.14, + "confidence": 0.98 + }, + { + "text": "valable", + "start": 151.14, + "end": 151.48, + "confidence": 0.998 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.62, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 151.62, + "end": 151.76, + "confidence": 0.999 + }, + { + "text": "aussi.", + "start": 151.76, + "end": 151.96, + "confidence": 0.996 + } + ] + }, + { + "id": 32, + "seek": 13084, + "start": 152.34, + "end": 158.22, + "text": " Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais?", + "tokens": [ + 7477, + 11, + 18018, + 294, + 7811, + 270, + 13, + 413, + 6, + 19947, + 13, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.948, + "words": [ + { + "text": "Donc,", + "start": 152.34, + "end": 153.46, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 153.46, + "end": 153.66, + "confidence": 0.976 + }, + { + "text": "inédit.", + "start": 153.66, + "end": 154.24, + "confidence": 0.996 + }, + { + "text": "D'accord.", + "start": 154.24, + "end": 155.48, + "confidence": 0.978 + }, + { + "text": "Mais", + "start": 155.48, + "end": 155.82, + "confidence": 0.557 + }, + { + "text": "pourquoi", + "start": 155.82, + "end": 156.32, + "confidence": 0.994 + }, + { + "text": "a-t-on", + "start": 156.32, + "end": 156.68, + "confidence": 0.94 + }, + { + "text": "l'impression", + "start": 156.68, + "end": 157.06, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.26, + "confidence": 0.995 + }, + { + "text": "n'en", + "start": 157.26, + "end": 157.44, + "confidence": 0.878 + }, + { + "text": "sortira", + "start": 157.44, + "end": 157.9, + "confidence": 0.989 + }, + { + "text": "jamais?", + "start": 157.9, + "end": 158.22, + "confidence": 0.998 + } + ] + }, + { + "id": 33, + "seek": 15884, + "start": 158.86, + "end": 165.32, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux?", + "tokens": [ + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.962, + "words": [ + { + "text": "Est-ce", + "start": 158.86, + "end": 159.34, + "confidence": 0.981 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.997 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.62, + "confidence": 0.999 + }, + { + "text": "en", + "start": 159.62, + "end": 159.78, + "confidence": 0.97 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.12, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.12, + "end": 160.34, + "confidence": 0.998 + }, + { + "text": "faute", + "start": 160.34, + "end": 160.66, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.66, + "end": 160.94, + "confidence": 0.995 + }, + { + "text": "les", + "start": 160.94, + "end": 161.28, + "confidence": 0.995 + }, + { + "text": "gens", + "start": 161.28, + "end": 161.44, + "confidence": 1.0 + }, + { + "text": "qui", + "start": 161.44, + "end": 161.58, + "confidence": 0.984 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.72, + "confidence": 0.998 + }, + { + "text": "créé", + "start": 161.72, + "end": 162.3, + "confidence": 0.99 + }, + { + "text": "cet", + "start": 162.3, + "end": 162.46, + "confidence": 0.852 + }, + { + "text": "outil", + "start": 162.46, + "end": 162.78, + "confidence": 0.99 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.34, + "confidence": 0.994 + }, + { + "text": "et", + "start": 163.34, + "end": 163.5, + "confidence": 0.954 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.86, + "confidence": 0.992 + }, + { + "text": "et", + "start": 163.86, + "end": 163.92, + "confidence": 0.54 + }, + { + "text": "diabolique", + "start": 163.92, + "end": 164.4, + "confidence": 0.951 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.703 + }, + { + "text": "que", + "start": 164.66, + "end": 164.84, + "confidence": 0.99 + }, + { + "text": "merveilleux?", + "start": 164.84, + "end": 165.32, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15884, + "start": 166.34, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.984, + "words": [ + { + "text": "Les", + "start": 166.34, + "end": 167.04, + "confidence": 0.926 + }, + { + "text": "économistes", + "start": 167.04, + "end": 167.48, + "confidence": 0.998 + }, + { + "text": "parlent", + "start": 167.48, + "end": 167.68, + "confidence": 0.995 + }, + { + "text": "de", + "start": 167.68, + "end": 167.82, + "confidence": 0.995 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.36, + "confidence": 0.985 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.997 + }, + { + "text": "sentier.", + "start": 168.5, + "end": 168.82, + "confidence": 0.978 + } + ] + }, + { + "id": 35, + "seek": 16884, + "start": 168.86, + "end": 177.42, + "text": " C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 50364, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 871, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.10415018598238628, + "compression_ratio": 1.3916666666666666, + "no_speech_prob": 3.958350498578511e-05, + "confidence": 0.93, + "words": [ + { + "text": "C'est", + "start": 168.86, + "end": 169.1, + "confidence": 0.952 + }, + { + "text": "l'idée", + "start": 169.1, + "end": 169.34, + "confidence": 0.992 + }, + { + "text": "qu'on", + "start": 169.34, + "end": 169.7, + "confidence": 0.825 + }, + { + "text": "est", + "start": 169.7, + "end": 169.88, + "confidence": 0.631 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.0, + "confidence": 0.993 + }, + { + "text": "un", + "start": 170.0, + "end": 170.54, + "confidence": 0.998 + }, + { + "text": "sentier", + "start": 170.54, + "end": 170.78, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 170.78, + "end": 170.84, + "confidence": 0.961 + }, + { + "text": "a", + "start": 170.84, + "end": 170.94, + "confidence": 0.98 + }, + { + "text": "été", + "start": 170.94, + "end": 171.12, + "confidence": 0.996 + }, + { + "text": "établi,", + "start": 171.12, + "end": 171.9, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.12, + "confidence": 0.989 + }, + { + "text": "volontairement", + "start": 172.12, + "end": 172.72, + "confidence": 0.965 + }, + { + "text": "en", + "start": 172.72, + "end": 172.8, + "confidence": 0.941 + }, + { + "text": "marchant", + "start": 172.8, + "end": 173.06, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.06, + "end": 174.24, + "confidence": 0.972 + }, + { + "text": "soit", + "start": 174.24, + "end": 174.92, + "confidence": 0.996 + }, + { + "text": "en", + "start": 174.92, + "end": 175.36, + "confidence": 0.991 + }, + { + "text": "définissant", + "start": 175.36, + "end": 175.5, + "confidence": 0.983 + }, + { + "text": "des", + "start": 175.5, + "end": 175.76, + "confidence": 0.984 + }, + { + "text": "bornes,", + "start": 175.76, + "end": 176.04, + "confidence": 0.975 + }, + { + "text": "en", + "start": 176.04, + "end": 176.08, + "confidence": 0.68 + }, + { + "text": "définissant", + "start": 176.08, + "end": 176.58, + "confidence": 0.997 + }, + { + "text": "une", + "start": 176.58, + "end": 176.82, + "confidence": 0.944 + }, + { + "text": "signalétique.", + "start": 176.82, + "end": 177.42, + "confidence": 0.642 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/bonjour.wav.words.json b/tests/expected/medium_auto/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..289387fb36ad5876330338048569d2468697e8b0 --- /dev/null +++ b/tests/expected/medium_auto/bonjour.wav.words.json @@ -0,0 +1,133 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7047327041625977, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08847080171108246, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.011238126084208488, + "zh": 0.010232431814074516, + "de": 0.0010617697844281793, + "es": 0.0004289938078727573, + "ru": 0.0012807389721274376, + "ko": 0.002547053387388587, + "fr": 0.950332760810852, + "ja": 0.0031207147985696793, + "pt": 0.007969025522470474, + "tr": 0.00019950064597651362, + "pl": 0.0014287674566730857, + "ca": 8.62955130287446e-06, + "nl": 0.000533891434315592, + "ar": 0.0016445013461634517, + "sv": 0.00013711463543586433, + "it": 0.002547053387388587, + "id": 8.187490311684087e-05, + "hi": 0.00011017472570529208, + "fi": 1.2169617548352107e-05, + "vi": 0.00039675438893027604, + "he": 7.572189497295767e-05, + "uk": 3.206266774213873e-05, + "el": 0.000256163883022964, + "ms": 0.0003188011178281158, + "cs": 2.6999477995559573e-05, + "ro": 5.123599839862436e-05, + "da": 5.316512215358671e-06, + "hu": 1.612212508916855e-05, + "ta": 2.616879100969527e-05, + "no": 6.616506198042771e-06, + "th": 0.00017883122200146317, + "ur": 2.458330345689319e-05, + "hr": 3.7115235045348527e-06, + "bg": 6.513926564366557e-06, + "lt": 2.4865656200745434e-07, + "la": 0.000624182284809649, + "mi": 9.722884715301916e-05, + "ml": 5.073050942883128e-06, + "cy": 0.0012220896314829588, + "sk": 6.75918613524118e-07, + "te": 6.826536719017895e-06, + "fa": 2.5763081794138998e-05, + "lv": 8.728667211244101e-08, + "bn": 3.0292571864265483e-06, + "sr": 7.9475412917418e-08, + "az": 6.692501131055906e-08, + "sl": 8.62955130287446e-06, + "kn": 2.7681386072231362e-08, + "et": 7.824326075933641e-08, + "mk": 5.591794050019416e-08, + "br": 0.00033410071046091616, + "eu": 1.1678827149808058e-06, + "is": 2.817647839492565e-07, + "hy": 1.6054474372140248e-07, + "ne": 1.1034068592152835e-07, + "mn": 5.023000539949862e-07, + "bs": 1.2049553106407984e-06, + "kk": 6.093591764511075e-08, + "sq": 8.199824463872574e-08, + "sw": 9.626958672015462e-06, + "gl": 1.3789981494483072e-05, + "mr": 1.2900133583571005e-07, + "pa": 2.8620195280382177e-07, + "si": 7.85729571362026e-06, + "km": 0.00011912727495655417, + "sn": 8.316423918586224e-05, + "yo": 1.0409225069452077e-05, + "so": 8.44234637753516e-09, + "af": 6.551227897944045e-07, + "oc": 2.4970433514681645e-05, + "ka": 9.717080473592432e-09, + "be": 6.720701094309334e-06, + "tg": 5.580050199682773e-09, + "sd": 3.398732246751024e-07, + "gu": 2.620821071275259e-08, + "am": 2.2592889337147426e-08, + "yi": 6.551227897944045e-07, + "lo": 1.4534535921484348e-06, + "uz": 8.359054559647916e-10, + "fo": 2.061435395717126e-07, + "ht": 3.653982048490434e-06, + "ps": 2.6733102913567564e-06, + "tk": 4.772870099145621e-09, + "nn": 0.0005955988890491426, + "mt": 2.6620929460818843e-08, + "sa": 2.006430622714106e-05, + "lb": 5.283085080520777e-09, + "my": 3.7115235045348527e-06, + "bo": 1.8088522892867331e-06, + "tl": 2.829520781233441e-05, + "mg": 1.2772860991105972e-08, + "as": 2.1558285823175538e-08, + "tt": 1.636623392364811e-09, + "haw": 0.0001392738922731951, + "ln": 5.431159024738008e-07, + "ha": 1.034377827835442e-08, + "ba": 6.1284861629928855e-09, + "jw": 0.00025219243252649903, + "su": 7.053843731341658e-09 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/medium_auto/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2e96c407e464bc3dd04a305c85437b74e2da89f0 --- /dev/null +++ b/tests/expected/medium_auto/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,235 @@ +{ + "text": " Bonjour ! Est-ce que vous allez bien ? Bonjour ! Est-ce que vous allez bien ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.46, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.36153463216928333, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.07168596982955933, + "confidence": 0.937, + "words": [ + { + "text": "Bonjour !", + "start": 0.42, + "end": 1.92, + "confidence": 0.874 + }, + { + "text": "Est-ce", + "start": 1.92, + "end": 2.16, + "confidence": 0.886 + }, + { + "text": "que", + "start": 2.16, + "end": 2.24, + "confidence": 0.987 + }, + { + "text": "vous", + "start": 2.24, + "end": 2.38, + "confidence": 0.996 + }, + { + "text": "allez", + "start": 2.38, + "end": 2.58, + "confidence": 0.99 + }, + { + "text": "bien ?", + "start": 2.58, + "end": 3.46, + "confidence": 0.999 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.94, + "end": 35.86, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3062671698056735, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.40451109409332275, + "confidence": 0.933, + "words": [ + { + "text": "Bonjour !", + "start": 32.94, + "end": 34.44, + "confidence": 0.741 + }, + { + "text": "Est-ce", + "start": 34.44, + "end": 34.7, + "confidence": 0.921 + }, + { + "text": "que", + "start": 34.7, + "end": 34.76, + "confidence": 0.996 + }, + { + "text": "vous", + "start": 34.76, + "end": 34.9, + "confidence": 0.998 + }, + { + "text": "allez", + "start": 34.9, + "end": 35.1, + "confidence": 0.997 + }, + { + "text": "bien ?", + "start": 35.1, + "end": 35.86, + "confidence": 0.999 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.06090075522661209, + "zh": 0.003714969614520669, + "de": 0.005753857549279928, + "es": 0.007160791661590338, + "ru": 0.0019884801004081964, + "ko": 0.002675792435184121, + "fr": 0.7536066174507141, + "ja": 0.002848366042599082, + "pt": 0.004921535030007362, + "tr": 0.000999870477244258, + "pl": 0.0036573747638612986, + "ca": 2.107844193233177e-05, + "nl": 0.002848366042599082, + "ar": 0.0026343080680817366, + "sv": 0.0002567879855632782, + "it": 0.0036573747638612986, + "id": 0.00012320821406319737, + "hi": 0.0004168080340605229, + "fi": 8.467969746561721e-05, + "vi": 0.0002488874306436628, + "he": 4.3250154703855515e-05, + "uk": 3.999985347036272e-05, + "el": 7.59063841542229e-05, + "ms": 0.0007201798143796623, + "cs": 0.00030021555721759796, + "ro": 9.746573778102174e-05, + "da": 4.257962427800521e-05, + "hu": 7.3570990934968e-05, + "ta": 7.5157495302846655e-06, + "no": 5.382568997447379e-05, + "th": 0.00013744870375376195, + "ur": 7.243038271553814e-05, + "hr": 2.8526567348308163e-06, + "bg": 1.182393498311285e-05, + "lt": 5.035238359596406e-07, + "la": 0.02311531826853752, + "mi": 0.0002196424175053835, + "ml": 1.4946853298170026e-05, + "cy": 0.006319376640021801, + "sk": 8.254436579591129e-06, + "te": 1.9801364032900892e-05, + "fa": 0.00011574340896913782, + "lv": 8.70011263032211e-07, + "bn": 1.1640619959507603e-05, + "sr": 9.02768562127676e-08, + "az": 3.3350938792864326e-06, + "sl": 1.693700505711604e-05, + "kn": 3.4606654253366287e-07, + "et": 1.6253949297606596e-06, + "mk": 6.401587882010062e-08, + "br": 0.013588794507086277, + "eu": 2.0113191567361355e-05, + "is": 4.928948783344822e-06, + "hy": 1.0659599638529471e-06, + "ne": 2.054691776720574e-06, + "mn": 5.056455120211467e-05, + "bs": 3.440960881562205e-06, + "kk": 4.957174724040669e-07, + "sq": 1.9718275723334955e-07, + "sw": 7.754325451969635e-06, + "gl": 7.02019315212965e-05, + "mr": 1.7851471056928858e-06, + "pa": 4.558532509690849e-06, + "si": 6.0046888393117115e-05, + "km": 0.0015730170998722315, + "sn": 0.00039155493141151965, + "yo": 0.00011574340896913782, + "so": 1.5968348066053295e-07, + "af": 7.876437848608475e-06, + "oc": 0.000635556410998106, + "ka": 1.0150099427619352e-07, + "be": 1.0765814295154996e-05, + "tg": 1.0071110523313109e-07, + "sd": 1.2199266166135203e-05, + "gu": 1.4092016442646127e-07, + "am": 8.565230018575676e-07, + "yi": 1.3823579138261266e-05, + "lo": 7.876437848608475e-06, + "uz": 3.974836104703172e-08, + "fo": 2.3514701751992106e-05, + "ht": 0.00040398421697318554, + "ps": 3.2833879686222645e-06, + "tk": 1.9718275723334955e-07, + "nn": 0.07120019197463989, + "mt": 1.1707280691553024e-06, + "sa": 0.0002955611562356353, + "lb": 6.670607035630383e-07, + "my": 2.792439772747457e-05, + "bo": 7.5157495302846655e-06, + "tl": 7.243038271553814e-05, + "mg": 5.530127964448184e-07, + "as": 1.0659599638529471e-06, + "tt": 2.5068976228226347e-08, + "haw": 0.01446519698947668, + "ln": 2.9725370040978305e-05, + "ha": 3.407012627576478e-07, + "ba": 2.0664577959905728e-07, + "jw": 0.006832874845713377, + "su": 3.407012627576478e-07 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/empty.mp3.words.json b/tests/expected/medium_auto/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..a64d866b5d0e34bf727524d05d0daeaffcc09622 --- /dev/null +++ b/tests/expected/medium_auto/empty.mp3.words.json @@ -0,0 +1,147 @@ +{ + "text": " Thanks for watching!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 2.52, + "text": " Thanks for watching!", + "tokens": [ + 50364, + 2561, + 337, + 1976, + 0, + 50518 + ], + "temperature": 0.0, + "avg_logprob": -0.8082353728158134, + "compression_ratio": 0.7142857142857143, + "no_speech_prob": 0.4414949119091034, + "confidence": 0.369, + "words": [ + { + "text": "Thanks", + "start": 0.0, + "end": 1.5, + "confidence": 0.056 + }, + { + "text": "for", + "start": 1.5, + "end": 2.46, + "confidence": 0.948 + }, + { + "text": "watching!", + "start": 2.46, + "end": 2.52, + "confidence": 0.948 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.47342103719711304, + "zh": 0.014982158318161964, + "de": 0.007772641722112894, + "es": 0.009980270639061928, + "ru": 0.07260146737098694, + "ko": 0.08099278807640076, + "fr": 0.008019371889531612, + "ja": 0.02843114361166954, + "pt": 0.012038503773510456, + "tr": 0.008671008981764317, + "pl": 0.005018395371735096, + "ca": 8.500757394358516e-05, + "nl": 0.00318988598883152, + "ar": 0.005776137113571167, + "sv": 0.0022975888568907976, + "it": 0.0030917434487491846, + "id": 0.0014834346948191524, + "hi": 0.0032911438029259443, + "fi": 0.0010037421016022563, + "vi": 0.0018752370961010456, + "he": 0.0002384096587775275, + "uk": 0.0007459177868440747, + "el": 0.0014155033277347684, + "ms": 0.0035034040920436382, + "cs": 0.0009139176108874381, + "ro": 0.0007229683105833828, + "da": 0.00015154240827541798, + "hu": 0.0007817152072675526, + "ta": 0.0012688488932326436, + "no": 0.0002744078228715807, + "th": 0.0022619678638875484, + "ur": 0.0007576643256470561, + "hr": 6.938115984667093e-05, + "bg": 0.00011619159340625629, + "lt": 1.3450153346639127e-05, + "la": 0.010961182415485382, + "mi": 0.0003309990279376507, + "ml": 0.0007343534380197525, + "cy": 0.004940592218190432, + "sk": 6.938115984667093e-05, + "te": 0.0004055484605487436, + "fa": 0.0001638563844608143, + "lv": 4.7214471123879775e-06, + "bn": 0.00011261674080742523, + "sr": 1.0210846994596068e-06, + "az": 9.743258715388947e-07, + "sl": 6.219287752173841e-05, + "kn": 1.6316886330969282e-06, + "et": 5.025954578741221e-06, + "mk": 1.7197871216012572e-07, + "br": 0.00019156753842253238, + "eu": 3.6770668430108344e-06, + "is": 2.0832003428949974e-05, + "hy": 3.91421735912445e-06, + "ne": 8.286398951895535e-06, + "mn": 6.027939889463596e-05, + "bs": 8.286398951895535e-06, + "kk": 1.175261218122614e-06, + "sq": 1.3317454659045325e-06, + "sw": 5.155970575287938e-05, + "gl": 5.4885022109374404e-05, + "mr": 1.5328296285588294e-06, + "pa": 2.1281230146996677e-06, + "si": 0.00022049288963899016, + "km": 0.0006380173726938665, + "sn": 0.0003523466584738344, + "yo": 3.713705882546492e-05, + "so": 7.812485591784935e-08, + "af": 1.115056875278242e-05, + "oc": 8.031453035073355e-06, + "ka": 3.4202051324427885e-07, + "be": 5.434352715383284e-06, + "tg": 4.348302695689199e-08, + "sd": 2.1616360754705966e-06, + "gu": 1.4710117568483838e-07, + "am": 4.5310360974326613e-07, + "yi": 2.337286105102976e-06, + "lo": 9.995304026233498e-06, + "uz": 9.702374903497457e-09, + "fo": 7.784351510053966e-06, + "ht": 9.840341590461321e-06, + "ps": 4.795799213752616e-06, + "tk": 8.316347788195344e-08, + "nn": 0.213387593626976, + "mt": 4.2565142166495207e-07, + "sa": 6.938115984667093e-05, + "lb": 4.084852989194587e-08, + "my": 7.3127202995237894e-06, + "bo": 8.286398951895535e-06, + "tl": 0.0013091264991089702, + "mg": 1.653903041187732e-07, + "as": 3.3671796018097666e-07, + "tt": 1.6633739718940888e-08, + "haw": 0.0035034040920436382, + "ln": 3.0658520699944347e-07, + "ha": 1.2680915517648828e-07, + "ba": 7.997752504707023e-08, + "jw": 0.004940592218190432, + "su": 3.4202051324427885e-07 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/gaenswein15.mp3.words.json b/tests/expected/medium_auto/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..aa5d04fa2fc90249d6893d9ad39dd96192ddfa88 --- /dev/null +++ b/tests/expected/medium_auto/gaenswein15.mp3.words.json @@ -0,0 +1,417 @@ +{ + "text": " Die Wiederzulassung des Messbuchs von 1962 als Missale für die außerordentliche Form des römischen Rethus ist dann nicht so weitergegangen, wie sich Papst Benediktas gewünscht hatte. Das hat er als Emeritor so gemacht.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.84, + "end": 8.56, + "text": " Die Wiederzulassung des Messbuchs von 1962 als Missale für die außerordentliche Form des römischen Rethus", + "tokens": [ + 50364, + 3229, + 45742, + 89, + 425, + 40828, + 730, + 9847, + 65, + 37503, + 2957, + 39498, + 3907, + 5275, + 1220, + 2959, + 978, + 39428, + 765, + 7698, + 68, + 10126, + 730, + 367, + 32374, + 6282, + 497, + 3293, + 301, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.2819720928485577, + "compression_ratio": 1.247191011235955, + "no_speech_prob": 0.2541808485984802, + "confidence": 0.869, + "words": [ + { + "text": "Die", + "start": 0.84, + "end": 1.12, + "confidence": 0.872 + }, + { + "text": "Wiederzulassung", + "start": 1.12, + "end": 1.92, + "confidence": 0.976 + }, + { + "text": "des", + "start": 1.92, + "end": 2.16, + "confidence": 0.986 + }, + { + "text": "Messbuchs", + "start": 2.16, + "end": 2.74, + "confidence": 0.947 + }, + { + "text": "von", + "start": 2.74, + "end": 3.28, + "confidence": 0.971 + }, + { + "text": "1962", + "start": 3.28, + "end": 4.9, + "confidence": 0.98 + }, + { + "text": "als", + "start": 4.9, + "end": 5.26, + "confidence": 0.925 + }, + { + "text": "Missale", + "start": 5.26, + "end": 5.76, + "confidence": 0.905 + }, + { + "text": "für", + "start": 5.76, + "end": 5.96, + "confidence": 0.956 + }, + { + "text": "die", + "start": 5.96, + "end": 6.12, + "confidence": 0.99 + }, + { + "text": "außerordentliche", + "start": 6.12, + "end": 7.06, + "confidence": 0.942 + }, + { + "text": "Form", + "start": 7.06, + "end": 7.32, + "confidence": 0.834 + }, + { + "text": "des", + "start": 7.32, + "end": 7.64, + "confidence": 0.863 + }, + { + "text": "römischen", + "start": 7.64, + "end": 8.04, + "confidence": 0.675 + }, + { + "text": "Rethus", + "start": 8.04, + "end": 8.56, + "confidence": 0.63 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 9.44, + "end": 12.74, + "text": " ist dann nicht so weitergegangen, wie sich Papst Benediktas gewünscht hatte.", + "tokens": [ + 50814, + 1418, + 3594, + 1979, + 370, + 8988, + 432, + 47152, + 11, + 3355, + 3041, + 15919, + 372, + 39753, + 9874, + 296, + 6906, + 3412, + 82, + 4701, + 13299, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.2819720928485577, + "compression_ratio": 1.247191011235955, + "no_speech_prob": 0.2541808485984802, + "confidence": 0.889, + "words": [ + { + "text": "ist", + "start": 9.44, + "end": 9.66, + "confidence": 0.975 + }, + { + "text": "dann", + "start": 9.66, + "end": 9.82, + "confidence": 0.685 + }, + { + "text": "nicht", + "start": 9.82, + "end": 10.0, + "confidence": 0.996 + }, + { + "text": "so", + "start": 10.0, + "end": 10.16, + "confidence": 0.992 + }, + { + "text": "weitergegangen,", + "start": 10.16, + "end": 10.84, + "confidence": 0.811 + }, + { + "text": "wie", + "start": 10.9, + "end": 11.06, + "confidence": 0.988 + }, + { + "text": "sich", + "start": 11.06, + "end": 11.26, + "confidence": 0.964 + }, + { + "text": "Papst", + "start": 11.26, + "end": 11.56, + "confidence": 0.885 + }, + { + "text": "Benediktas", + "start": 11.56, + "end": 12.08, + "confidence": 0.764 + }, + { + "text": "gewünscht", + "start": 12.08, + "end": 12.56, + "confidence": 0.993 + }, + { + "text": "hatte.", + "start": 12.56, + "end": 12.74, + "confidence": 0.952 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 14.0, + "end": 15.48, + "text": " Das hat er als Emeritor so gemacht.", + "tokens": [ + 51014, + 2846, + 2385, + 1189, + 3907, + 18477, + 3029, + 370, + 12293, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.2819720928485577, + "compression_ratio": 1.247191011235955, + "no_speech_prob": 0.2541808485984802, + "confidence": 0.441, + "words": [ + { + "text": "Das", + "start": 14.0, + "end": 14.2, + "confidence": 0.967 + }, + { + "text": "hat", + "start": 14.2, + "end": 14.36, + "confidence": 0.845 + }, + { + "text": "er", + "start": 14.36, + "end": 14.5, + "confidence": 0.99 + }, + { + "text": "als", + "start": 14.5, + "end": 14.7, + "confidence": 0.977 + }, + { + "text": "Emeritor", + "start": 14.7, + "end": 15.24, + "confidence": 0.427 + }, + { + "text": "so", + "start": 15.24, + "end": 15.42, + "confidence": 0.111 + }, + { + "text": "gemacht.", + "start": 15.42, + "end": 15.48, + "confidence": 0.09 + } + ] + } + ], + "language": "de", + "language_probs": { + "en": 0.006050276570022106, + "zh": 0.0003577142197173089, + "de": 0.9861949682235718, + "es": 0.00043148567783646286, + "ru": 0.0005806274712085724, + "ko": 0.0005716257146559656, + "fr": 0.000818815955426544, + "ja": 0.0003577142197173089, + "pt": 0.0006788222817704082, + "tr": 0.0003577142197173089, + "pl": 0.0007572810281999409, + "ca": 1.290118916585925e-06, + "nl": 0.0009876806288957596, + "ar": 0.00017433597531635314, + "sv": 5.316946771927178e-05, + "it": 0.0005897710216231644, + "id": 1.8958158761961386e-05, + "hi": 5.23451562912669e-05, + "fi": 1.1144927157147322e-05, + "vi": 6.119767931522802e-05, + "he": 6.65493917040294e-06, + "uk": 7.90293597674463e-06, + "el": 4.013441866845824e-05, + "ms": 1.780953971319832e-05, + "cs": 9.99024723569164e-06, + "ro": 1.673051701800432e-05, + "da": 4.945531145494897e-06, + "hu": 4.477318361750804e-05, + "ta": 1.7633834659136483e-06, + "no": 2.9531115615100134e-06, + "th": 4.140842429478653e-05, + "ur": 2.6320798497181386e-05, + "hr": 2.313055773583983e-07, + "bg": 2.424061449346482e-07, + "lt": 1.8585924976832757e-07, + "la": 0.0002169646177208051, + "mi": 3.5069065233983565e-06, + "ml": 3.6182279927743366e-06, + "cy": 4.076644472661428e-05, + "sk": 3.5825306099468435e-07, + "te": 3.9346403468698554e-07, + "fa": 3.0948342555348063e-06, + "lv": 4.346102855379286e-08, + "bn": 5.131738589625456e-07, + "sr": 5.525449875420918e-09, + "az": 2.2902353791209862e-08, + "sl": 1.020567992782162e-06, + "kn": 8.829642794694337e-09, + "et": 6.891006165687941e-08, + "mk": 1.534361970811915e-09, + "br": 2.8178787943033967e-06, + "eu": 2.2071331784445647e-07, + "is": 1.381188639015818e-07, + "hy": 1.4474733234237647e-07, + "ne": 1.257586603742311e-07, + "mn": 3.161572408316715e-07, + "bs": 4.810697618040649e-08, + "kk": 7.320031247814995e-09, + "sq": 1.197477228487287e-08, + "sw": 2.621035548600048e-07, + "gl": 2.0940697140758857e-06, + "mr": 2.0529535404989474e-08, + "pa": 3.775959456220335e-08, + "si": 6.589282293134602e-07, + "km": 2.3965356376720592e-05, + "sn": 3.6182279927743366e-06, + "yo": 2.2771951080358122e-07, + "so": 3.266845127747331e-10, + "af": 1.0425758745213898e-07, + "oc": 4.6725008928660827e-07, + "ka": 1.3435321744026396e-09, + "be": 2.746821508026187e-07, + "tg": 2.0603733996704676e-10, + "sd": 1.869235610740816e-08, + "gu": 1.0463440114349964e-09, + "am": 1.924520987017786e-09, + "yi": 2.7042361239182355e-07, + "lo": 4.449167789744024e-08, + "uz": 1.629889556653552e-10, + "fo": 7.869775942026536e-08, + "ht": 1.3597754389138572e-07, + "ps": 6.945052888340797e-08, + "tk": 9.452930660458492e-10, + "nn": 0.0003206529363524169, + "mt": 1.4190527641844142e-09, + "sa": 3.3463138606748544e-06, + "lb": 7.898218190582895e-10, + "my": 1.238089453181601e-07, + "bo": 6.784170381024524e-08, + "tl": 3.046853180421749e-06, + "mg": 1.0463440114349964e-09, + "as": 1.6851668949158238e-09, + "tt": 3.1106988512830114e-11, + "haw": 7.195705165941035e-06, + "ln": 3.7387062334914845e-09, + "ha": 3.8193290197163776e-10, + "ba": 9.306375114981336e-10, + "jw": 2.982536898343824e-05, + "su": 2.2104579278092729e-10 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/gloria.mp3.words.json b/tests/expected/medium_auto/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b06d3f10ff3991fd466724f251a4ba8cb7383949 --- /dev/null +++ b/tests/expected/medium_auto/gloria.mp3.words.json @@ -0,0 +1,639 @@ +{ + "text": " Hello. How are you? Love. How are you? I'm okay. I will be. I said she could stay with us tomorrow, she feels better. Of course she can. This won't be for long. Well, you can stay as long as you want, my love. I really miss you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.4, + "end": 1.74, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.285, + "words": [ + { + "text": "Hello.", + "start": 1.4, + "end": 1.74, + "confidence": 0.285 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 2.32, + "end": 3.5, + "text": " How are you?", + "tokens": [ + 50464, + 1012, + 366, + 291, + 30, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.784, + "words": [ + { + "text": "How", + "start": 2.32, + "end": 2.82, + "confidence": 0.548 + }, + { + "text": "are", + "start": 2.82, + "end": 3.24, + "confidence": 0.989 + }, + { + "text": "you?", + "start": 3.24, + "end": 3.5, + "confidence": 0.89 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.5, + "end": 4.26, + "text": " Love.", + "tokens": [ + 50564, + 5956, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.243, + "words": [ + { + "text": "Love.", + "start": 3.5, + "end": 4.26, + "confidence": 0.243 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 5.5, + "end": 6.38, + "text": " How are you?", + "tokens": [ + 50664, + 1012, + 366, + 291, + 30, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.927, + "words": [ + { + "text": "How", + "start": 5.5, + "end": 5.68, + "confidence": 0.851 + }, + { + "text": "are", + "start": 5.68, + "end": 6.08, + "confidence": 0.998 + }, + { + "text": "you?", + "start": 6.08, + "end": 6.38, + "confidence": 0.937 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 7.5, + "end": 9.18, + "text": " I'm okay. I will be.", + "tokens": [ + 50764, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.769, + "words": [ + { + "text": "I'm", + "start": 7.5, + "end": 7.52, + "confidence": 0.766 + }, + { + "text": "okay.", + "start": 7.52, + "end": 8.22, + "confidence": 0.488 + }, + { + "text": "I", + "start": 8.42, + "end": 8.64, + "confidence": 0.765 + }, + { + "text": "will", + "start": 8.64, + "end": 8.94, + "confidence": 0.949 + }, + { + "text": "be.", + "start": 8.94, + "end": 9.18, + "confidence": 0.997 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.5, + "end": 11.52, + "text": " I said she could stay with us tomorrow, she feels better.", + "tokens": [ + 50864, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 11, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.786, + "words": [ + { + "text": "I", + "start": 9.5, + "end": 9.52, + "confidence": 0.884 + }, + { + "text": "said", + "start": 9.52, + "end": 9.64, + "confidence": 0.858 + }, + { + "text": "she", + "start": 9.64, + "end": 9.78, + "confidence": 0.928 + }, + { + "text": "could", + "start": 9.78, + "end": 9.96, + "confidence": 0.91 + }, + { + "text": "stay", + "start": 9.96, + "end": 10.16, + "confidence": 0.994 + }, + { + "text": "with", + "start": 10.16, + "end": 10.28, + "confidence": 0.977 + }, + { + "text": "us", + "start": 10.28, + "end": 10.46, + "confidence": 0.993 + }, + { + "text": "tomorrow,", + "start": 10.46, + "end": 10.66, + "confidence": 0.512 + }, + { + "text": "she", + "start": 10.72, + "end": 10.86, + "confidence": 0.278 + }, + { + "text": "feels", + "start": 10.86, + "end": 11.16, + "confidence": 0.81 + }, + { + "text": "better.", + "start": 11.16, + "end": 11.52, + "confidence": 0.992 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 12.06, + "end": 13.36, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.928, + "words": [ + { + "text": "Of", + "start": 12.06, + "end": 12.34, + "confidence": 0.788 + }, + { + "text": "course", + "start": 12.34, + "end": 12.56, + "confidence": 0.989 + }, + { + "text": "she", + "start": 12.56, + "end": 12.88, + "confidence": 0.959 + }, + { + "text": "can.", + "start": 12.88, + "end": 13.36, + "confidence": 0.993 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.5, + "end": 15.26, + "text": " This won't be for long.", + "tokens": [ + 51064, + 639, + 1582, + 380, + 312, + 337, + 938, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.892, + "words": [ + { + "text": "This", + "start": 13.5, + "end": 14.28, + "confidence": 0.545 + }, + { + "text": "won't", + "start": 14.28, + "end": 14.6, + "confidence": 0.975 + }, + { + "text": "be", + "start": 14.6, + "end": 14.8, + "confidence": 0.996 + }, + { + "text": "for", + "start": 14.8, + "end": 14.96, + "confidence": 0.983 + }, + { + "text": "long.", + "start": 14.96, + "end": 15.26, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.5, + "end": 17.62, + "text": " Well, you can stay as long as you want, my love.", + "tokens": [ + 51164, + 1042, + 11, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.927, + "words": [ + { + "text": "Well,", + "start": 15.5, + "end": 15.56, + "confidence": 0.674 + }, + { + "text": "you", + "start": 15.68, + "end": 15.7, + "confidence": 0.744 + }, + { + "text": "can", + "start": 15.7, + "end": 15.94, + "confidence": 0.982 + }, + { + "text": "stay", + "start": 15.94, + "end": 16.14, + "confidence": 0.985 + }, + { + "text": "as", + "start": 16.14, + "end": 16.28, + "confidence": 0.972 + }, + { + "text": "long", + "start": 16.28, + "end": 16.4, + "confidence": 0.994 + }, + { + "text": "as", + "start": 16.4, + "end": 16.48, + "confidence": 0.996 + }, + { + "text": "you", + "start": 16.48, + "end": 16.62, + "confidence": 0.995 + }, + { + "text": "want,", + "start": 16.62, + "end": 16.8, + "confidence": 0.995 + }, + { + "text": "my", + "start": 16.88, + "end": 17.22, + "confidence": 0.949 + }, + { + "text": "love.", + "start": 17.22, + "end": 17.62, + "confidence": 0.993 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.76, + "end": 19.26, + "text": " I really miss you.", + "tokens": [ + 51264, + 286, + 534, + 1713, + 291, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.23906808740952434, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.792, + "words": [ + { + "text": "I", + "start": 17.76, + "end": 17.98, + "confidence": 0.587 + }, + { + "text": "really", + "start": 17.98, + "end": 18.36, + "confidence": 0.943 + }, + { + "text": "miss", + "start": 18.36, + "end": 18.78, + "confidence": 0.903 + }, + { + "text": "you.", + "start": 18.78, + "end": 19.26, + "confidence": 0.787 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.7641609907150269, + "zh": 0.0024704576935619116, + "de": 0.00040964208892546594, + "es": 0.0006444574682973325, + "ru": 0.0002950541384052485, + "ko": 0.00039088321500457823, + "fr": 0.0005512336501851678, + "ja": 0.00021586621005553752, + "pt": 0.0011310579720884562, + "tr": 0.0011135225649923086, + "pl": 0.00034495320869609714, + "ca": 8.453441841993481e-05, + "nl": 0.0008672122494317591, + "ar": 0.0006968246307224035, + "sv": 0.00031408347422257066, + "it": 0.00036150787491351366, + "id": 1.7174210370285437e-05, + "hi": 3.7511923437705263e-05, + "fi": 2.0394880266394466e-05, + "vi": 0.00011554484808584675, + "he": 7.386535799014382e-06, + "uk": 2.3474360205000266e-05, + "el": 0.00016551035514567047, + "ms": 0.00035590320476330817, + "cs": 1.108850210584933e-05, + "ro": 6.028714324202156e-06, + "da": 3.993123391410336e-05, + "hu": 1.6133675671881065e-05, + "ta": 6.518594091176055e-06, + "no": 2.7018824766855687e-05, + "th": 9.729852172313258e-05, + "ur": 4.385588181321509e-05, + "hr": 7.313661285479611e-07, + "bg": 1.32433274302457e-06, + "lt": 3.914720423381368e-07, + "la": 0.0018941658781841397, + "mi": 0.00033434011857025325, + "ml": 1.0255188499286305e-05, + "cy": 0.2189358025789261, + "sk": 1.3741961879532028e-07, + "te": 3.7726733808085555e-06, + "fa": 2.3474360205000266e-05, + "lv": 3.195101498931763e-07, + "bn": 2.183455762860831e-06, + "sr": 2.1073979894481454e-08, + "az": 2.7759523391068797e-07, + "sl": 1.4689880117657594e-05, + "kn": 2.705952439896464e-08, + "et": 1.1939221167267533e-07, + "mk": 1.6157979843001158e-08, + "br": 1.771937786543276e-05, + "eu": 1.0476335319253849e-06, + "is": 1.6907948520383798e-05, + "hy": 4.8238820937740456e-08, + "ne": 3.096798764090636e-07, + "mn": 1.2636871815630002e-06, + "bs": 2.592917780930293e-06, + "kk": 3.163578909948228e-08, + "sq": 5.520640229406126e-07, + "sw": 3.2777552405605093e-06, + "gl": 2.2752130462322384e-05, + "mr": 7.018707037786953e-08, + "pa": 4.299479030578368e-07, + "si": 1.3451880249704118e-06, + "km": 7.118511712178588e-05, + "sn": 9.78556909103645e-06, + "yo": 8.771728062129114e-06, + "so": 4.889556315390564e-09, + "af": 9.0501716840663e-06, + "oc": 2.4741793822613545e-06, + "ka": 3.633610079489813e-09, + "be": 1.1152000070069334e-06, + "tg": 6.180978395775583e-09, + "sd": 6.003418207001232e-08, + "gu": 4.889556315390564e-09, + "am": 2.278640565123169e-08, + "yi": 7.200272875707014e-07, + "lo": 5.186161615711171e-07, + "uz": 6.934865459662376e-10, + "fo": 1.5974502503013355e-06, + "ht": 2.7601463443716057e-06, + "ps": 1.5974502503013355e-06, + "tk": 4.417346044505166e-09, + "nn": 0.0033243645448237658, + "mt": 3.401167703032115e-07, + "sa": 3.5440982628642814e-06, + "lb": 8.188465905334397e-09, + "my": 1.648158786338172e-06, + "bo": 2.252765852972516e-06, + "tl": 5.994387902319431e-05, + "mg": 1.6933418223175067e-08, + "as": 3.64125725127451e-08, + "tt": 4.2481200779320716e-09, + "haw": 8.998641715152189e-05, + "ln": 5.2158600993834625e-08, + "ha": 2.0747258133724245e-08, + "ba": 6.085151049717297e-09, + "jw": 7.460136112058535e-05, + "su": 2.3879946908778038e-08 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/laugh1.mp3.words.json b/tests/expected/medium_auto/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7c2c1ff54baa3c535ecd26a331721abe9193f580 --- /dev/null +++ b/tests/expected/medium_auto/laugh1.mp3.words.json @@ -0,0 +1,133 @@ +{ + "text": " hahahaha", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.32, + "end": 1.58, + "text": " hahahaha", + "tokens": [ + 50364, + 17206, + 15380, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.2676313400268555, + "compression_ratio": 0.6666666666666666, + "no_speech_prob": 0.5892603397369385, + "confidence": 0.135, + "words": [ + { + "text": "hahahaha", + "start": 0.32, + "end": 1.58, + "confidence": 0.135 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.4557611346244812, + "zh": 0.02531365118920803, + "de": 0.0465589314699173, + "es": 0.07324747741222382, + "ru": 0.03406323865056038, + "ko": 0.01233688835054636, + "fr": 0.037411149591207504, + "ja": 0.0108872652053833, + "pt": 0.05978284776210785, + "tr": 0.016090335324406624, + "pl": 0.022339219227433205, + "ca": 0.0015202028444036841, + "nl": 0.008612535893917084, + "ar": 0.030060704797506332, + "sv": 0.01660109870135784, + "it": 0.011409756727516651, + "id": 0.001280139316804707, + "hi": 0.009912967681884766, + "fi": 0.003821789985522628, + "vi": 0.004682554863393307, + "he": 0.0012215173337608576, + "uk": 0.005919303745031357, + "el": 0.0043306550942361355, + "ms": 0.006400293670594692, + "cs": 0.009912967681884766, + "ro": 0.011058715172111988, + "da": 0.0026680391747504473, + "hu": 0.0022118822671473026, + "ta": 0.00040281369001604617, + "no": 0.0018919231370091438, + "th": 0.0018625915981829166, + "ur": 0.006400293670594692, + "hr": 0.0008527565514668822, + "bg": 0.0015202028444036841, + "lt": 0.0001932721061166376, + "la": 0.006400293670594692, + "mi": 0.0027100546285510063, + "ml": 0.0003610798448789865, + "cy": 0.011409756727516651, + "sk": 0.0009365698206238449, + "te": 0.001777297118678689, + "fa": 0.003821789985522628, + "lv": 0.0001679177221376449, + "bn": 0.001618247595615685, + "sr": 6.473804387496784e-05, + "az": 2.308306284248829e-05, + "sl": 0.0014280985342338681, + "kn": 2.656844117154833e-05, + "et": 0.00013283385487738997, + "mk": 4.662906576413661e-05, + "br": 0.0002812092425301671, + "eu": 0.0001902756921481341, + "is": 0.00027684951783157885, + "hy": 0.00018442152941133827, + "ne": 0.00011907148291356862, + "mn": 0.00012094659177819267, + "bs": 0.0003339442773722112, + "kk": 5.20184839842841e-05, + "sq": 0.00022595797781832516, + "sw": 0.0022118822671473026, + "gl": 0.0008137059630826116, + "mr": 4.963638639310375e-05, + "pa": 0.0001101231318898499, + "si": 0.0010612726910039783, + "km": 0.0010448191314935684, + "sn": 0.0018919231370091438, + "yo": 0.0003040597075596452, + "so": 1.1606892257987056e-05, + "af": 0.00010026824747910723, + "oc": 8.987988258013502e-05, + "ka": 3.1061652407515794e-05, + "be": 0.0001732480013743043, + "tg": 5.314022928359918e-06, + "sd": 6.177347677294165e-05, + "gu": 1.3359451259020716e-05, + "am": 1.9136530681862496e-05, + "yi": 7.451303099514917e-05, + "lo": 0.0001528908178443089, + "uz": 3.093131795139925e-07, + "fo": 5.713112841476686e-05, + "ht": 8.987988258013502e-05, + "ps": 0.00048588606296107173, + "tk": 2.250098987133242e-06, + "nn": 0.012928948737680912, + "mt": 1.6626105207251385e-05, + "sa": 0.0003499705926515162, + "lb": 1.2820676147384802e-06, + "my": 0.0001528908178443089, + "bo": 0.00016531439905520529, + "tl": 0.0021438298281282187, + "mg": 5.569047516473802e-06, + "as": 1.8260203432873823e-05, + "tt": 1.4988888779043918e-06, + "haw": 0.0009662997908890247, + "ln": 1.0903666407102719e-05, + "ha": 5.482707820192445e-06, + "ba": 2.250098987133242e-06, + "jw": 0.003119254019111395, + "su": 4.838472705159802e-06 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/laugh2.mp3.words.json b/tests/expected/medium_auto/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2d10eb93d779cfe2b714ea7dd331ecf25259d545 --- /dev/null +++ b/tests/expected/medium_auto/laugh2.mp3.words.json @@ -0,0 +1,133 @@ +{ + "text": " Hehehe", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.22, + "end": 0.64, + "text": " Hehehe", + "tokens": [ + 50364, + 634, + 23500, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.0083972930908203, + "compression_ratio": 0.5, + "no_speech_prob": 0.3666148781776428, + "confidence": 0.334, + "words": [ + { + "text": "Hehehe", + "start": 0.22, + "end": 0.64, + "confidence": 0.334 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.6813932657241821, + "zh": 0.020576294511556625, + "de": 0.017058348283171654, + "es": 0.016793882474303246, + "ru": 0.02368316985666752, + "ko": 0.09514381736516953, + "fr": 0.014590777456760406, + "ja": 0.04092084616422653, + "pt": 0.016533516347408295, + "tr": 0.002575425198301673, + "pl": 0.0035201888531446457, + "ca": 0.00028895470313727856, + "nl": 0.007688799407333136, + "ar": 0.0034118841867893934, + "sv": 0.0032556424848735332, + "it": 0.003631931496784091, + "id": 0.004887297749519348, + "hi": 0.004115517716854811, + "fi": 0.0030583932530134916, + "vi": 0.002657177858054638, + "he": 0.0002510481863282621, + "uk": 0.0007854602881707251, + "el": 0.0005398384528234601, + "ms": 0.003806231776252389, + "cs": 0.0009929147781804204, + "ro": 0.0006718398653902113, + "da": 0.00031735465745441616, + "hu": 0.0006117171142250299, + "ta": 0.0010085509857162833, + "no": 0.00040117386379279196, + "th": 0.00520250154659152, + "ur": 0.00040749143227003515, + "hr": 0.00012045423500239849, + "bg": 8.812620944809169e-05, + "lt": 2.263267924718093e-05, + "la": 0.0005151174264028668, + "mi": 0.0007854602881707251, + "ml": 0.0007040820200927556, + "cy": 0.0016628194134682417, + "sk": 6.347492308123037e-05, + "te": 0.0003596099268179387, + "fa": 5.429297380032949e-05, + "lv": 1.4612756785936654e-05, + "bn": 5.779457205790095e-05, + "sr": 1.9469614471745444e-06, + "az": 7.1624708652962e-07, + "sl": 0.00010630048927851021, + "kn": 2.138318905053893e-06, + "et": 1.735307705530431e-05, + "mk": 7.624410045536933e-07, + "br": 0.00010630048927851021, + "eu": 1.2695783880189992e-05, + "is": 3.241981903556734e-05, + "hy": 8.326097486133222e-06, + "ne": 2.0607289116014726e-05, + "mn": 2.2989092030911706e-05, + "bs": 1.0859279427677393e-05, + "kk": 3.5810101053357357e-06, + "sq": 2.3854670416767476e-06, + "sw": 0.00012623495422303677, + "gl": 8.951398922363296e-05, + "mr": 5.81255289944238e-06, + "pa": 2.7456558200356085e-06, + "si": 0.00014758360339328647, + "km": 0.0007732828380540013, + "sn": 0.00042042657150886953, + "yo": 2.3351120034931228e-05, + "so": 1.988946252140522e-07, + "af": 1.1203989743080456e-05, + "oc": 9.434705134481192e-06, + "ka": 5.000210308025999e-07, + "be": 1.507661636424018e-05, + "tg": 5.7431197575397164e-08, + "sd": 2.7888936529052444e-06, + "gu": 6.031405064277351e-07, + "am": 1.1093451348642702e-06, + "yi": 4.25255484515219e-06, + "lo": 2.3718846932752058e-05, + "uz": 1.9539980300464777e-08, + "fo": 3.811965598288225e-06, + "ht": 7.121688668121351e-06, + "ps": 9.144429895968642e-06, + "tk": 1.701235845530391e-07, + "nn": 0.007688799407333136, + "mt": 4.5527434622272267e-07, + "sa": 3.790260961977765e-05, + "lb": 8.621444180789695e-08, + "my": 2.524857882235665e-05, + "bo": 1.4386207112693228e-05, + "tl": 0.002699022414162755, + "mg": 4.2105995134988916e-07, + "as": 7.389831466753094e-07, + "tt": 4.724161328795162e-08, + "haw": 0.0005314690642990172, + "ln": 4.697263022990228e-07, + "ha": 3.080540125210973e-07, + "ba": 2.0045457915784937e-07, + "jw": 0.0017426196718588471, + "su": 6.031405064277351e-07 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/punctuations.mp3.words.json b/tests/expected/medium_auto/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..659195cefd4f52ec2216c1ff357d68304a280e89 --- /dev/null +++ b/tests/expected/medium_auto/punctuations.mp3.words.json @@ -0,0 +1,169 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26349667941822724, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03940592333674431, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi,", + "start": 0.38, + "end": 1.1, + "confidence": 0.807 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole ?", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.0032707557547837496, + "zh": 0.00046389183262363076, + "de": 0.0010785807389765978, + "es": 0.0011662238975986838, + "ru": 0.0009370873449370265, + "ko": 0.0007529708091169596, + "fr": 0.9805818200111389, + "ja": 0.0011303429491817951, + "pt": 0.0008141555008478463, + "tr": 0.0001845234219217673, + "pl": 0.0005864141858182847, + "ca": 1.4228917279979214e-05, + "nl": 0.0010785807389765978, + "ar": 0.00047119706869125366, + "sv": 9.134571882896125e-05, + "it": 0.0027542528696358204, + "id": 5.540397614822723e-05, + "hi": 4.8135811084648594e-05, + "fi": 4.96638058393728e-05, + "vi": 5.806286935694516e-05, + "he": 0.00010513827874092385, + "uk": 1.743363645800855e-05, + "el": 0.0002902960986830294, + "ms": 0.00015297529171220958, + "cs": 7.692080544074997e-05, + "ro": 0.00010847572411876172, + "da": 4.076653112861095e-06, + "hu": 7.003719656495377e-05, + "ta": 3.7702884583268315e-06, + "no": 5.234526270214701e-06, + "th": 4.96638058393728e-05, + "ur": 9.62776812230004e-06, + "hr": 9.33155206439551e-06, + "bg": 1.0410099093860481e-05, + "lt": 3.3990178849307995e-07, + "la": 0.0007298043346963823, + "mi": 1.770817289070692e-05, + "ml": 7.195720854724641e-07, + "cy": 0.00010847572411876172, + "sk": 2.9825432648067363e-06, + "te": 1.0634585123625584e-06, + "fa": 1.1796187209256459e-05, + "lv": 2.6888443471762e-07, + "bn": 6.654953494944493e-07, + "sr": 7.069282759175621e-08, + "az": 1.484920915117982e-07, + "sl": 1.3366831808525603e-05, + "kn": 2.6210409842519766e-08, + "et": 6.866204671496234e-07, + "mk": 1.484920915117982e-07, + "br": 0.0005956488894298673, + "eu": 5.83953624300193e-06, + "is": 3.0948407925279753e-07, + "hy": 5.102530167278019e-07, + "ne": 1.3104380514050717e-07, + "mn": 8.545129617232305e-07, + "bs": 1.895819650599151e-06, + "kk": 9.006500789610072e-08, + "sq": 8.594062705924443e-08, + "sw": 6.314044185273815e-06, + "gl": 2.742695687629748e-05, + "mr": 1.3310744861882995e-07, + "pa": 5.636124456032121e-08, + "si": 2.182075377277215e-06, + "km": 0.00010847572411876172, + "sn": 5.7162687880918384e-05, + "yo": 4.477327820495702e-06, + "so": 6.225518323077495e-09, + "af": 3.4525447745181737e-07, + "oc": 9.424534073332325e-05, + "ka": 3.813592286405765e-08, + "be": 4.917382739222376e-06, + "tg": 6.945067632102564e-09, + "sd": 2.565712975410861e-07, + "gu": 3.016799965394057e-08, + "am": 6.141899433487197e-08, + "yi": 5.264501510282571e-07, + "lo": 9.239488463208545e-07, + "uz": 3.51958884259318e-09, + "fo": 1.936701892191195e-07, + "ht": 8.630276170151774e-06, + "ps": 5.965462150925305e-07, + "tk": 2.64159805141162e-08, + "nn": 0.0012221921933814883, + "mt": 3.667495818149291e-08, + "sa": 8.496474947605748e-06, + "lb": 2.2771999397264153e-08, + "my": 1.323495553151588e-06, + "bo": 5.264501510282571e-07, + "tl": 2.6170988348894753e-05, + "mg": 6.693063170359892e-08, + "as": 1.6660351320751943e-08, + "tt": 3.057871955647329e-09, + "haw": 5.454502024804242e-05, + "ln": 2.0498698631854495e-06, + "ha": 2.0412686652093726e-08, + "ba": 3.1125633626061244e-08, + "jw": 0.000372747570509091, + "su": 1.4029421180339341e-08 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/radio_short.mp3.words.json b/tests/expected/medium_auto/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9c68416e5ae00e679c4ec3e52212bd64f2c44f8c --- /dev/null +++ b/tests/expected/medium_auto/radio_short.mp3.words.json @@ -0,0 +1,1632 @@ +{ + "text": "3212122222222211111111111111111111111111111111111111111111111", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.04, + "end": 0.7, + "text": "3", + "tokens": [ + 50364, + 18, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.029, + "words": [ + { + "text": "3", + "start": 0.04, + "end": 0.7, + "confidence": 0.029 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.96, + "end": 3.02, + "text": "2", + "tokens": [ + 50464, + 17, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.689, + "words": [ + { + "text": "2", + "start": 1.96, + "end": 3.02, + "confidence": 0.689 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.72, + "end": 4.92, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.95, + "words": [ + { + "text": "1", + "start": 3.72, + "end": 4.92, + "confidence": 0.95 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 5.5, + "end": 7.08, + "text": "2", + "tokens": [ + 50664, + 17, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.279, + "words": [ + { + "text": "2", + "start": 5.5, + "end": 7.08, + "confidence": 0.279 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 8.46, + "end": 9.34, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.702, + "words": [ + { + "text": "1", + "start": 8.46, + "end": 9.34, + "confidence": 0.702 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.78, + "end": 11.34, + "text": "2", + "tokens": [ + 50864, + 17, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.919, + "words": [ + { + "text": "2", + "start": 9.78, + "end": 11.34, + "confidence": 0.919 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 11.8, + "end": 13.4, + "text": "2", + "tokens": [ + 50964, + 17, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.657, + "words": [ + { + "text": "2", + "start": 11.8, + "end": 13.4, + "confidence": 0.657 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.68, + "end": 15.54, + "text": "2", + "tokens": [ + 51064, + 17, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.771, + "words": [ + { + "text": "2", + "start": 13.68, + "end": 15.54, + "confidence": 0.771 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.54, + "end": 17.2, + "text": "2", + "tokens": [ + 51164, + 17, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.906, + "words": [ + { + "text": "2", + "start": 15.54, + "end": 17.2, + "confidence": 0.906 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.5, + "end": 19.24, + "text": "2", + "tokens": [ + 51264, + 17, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.809, + "words": [ + { + "text": "2", + "start": 17.5, + "end": 19.24, + "confidence": 0.809 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 19.64, + "end": 20.8, + "text": "2", + "tokens": [ + 51364, + 17, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.629, + "words": [ + { + "text": "2", + "start": 19.64, + "end": 20.8, + "confidence": 0.629 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 22.06, + "end": 22.9, + "text": "2", + "tokens": [ + 51464, + 17, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.835, + "words": [ + { + "text": "2", + "start": 22.06, + "end": 22.9, + "confidence": 0.835 + } + ] + }, + { + "id": 12, + "seek": 0, + "start": 23.56, + "end": 25.08, + "text": "2", + "tokens": [ + 51564, + 17, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.974, + "words": [ + { + "text": "2", + "start": 23.56, + "end": 25.08, + "confidence": 0.974 + } + ] + }, + { + "id": 13, + "seek": 0, + "start": 26.48, + "end": 27.08, + "text": "2", + "tokens": [ + 51664, + 17, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.2844874208623713, + "compression_ratio": 1.0, + "no_speech_prob": 0.8407736420631409, + "confidence": 0.97, + "words": [ + { + "text": "2", + "start": 26.48, + "end": 27.08, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 2800, + "start": 28.12, + "end": 29.62, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.316, + "words": [ + { + "text": "1", + "start": 28.12, + "end": 29.62, + "confidence": 0.316 + } + ] + }, + { + "id": 15, + "seek": 2800, + "start": 30.84, + "end": 31.16, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.545, + "words": [ + { + "text": "1", + "start": 30.84, + "end": 31.16, + "confidence": 0.545 + } + ] + }, + { + "id": 16, + "seek": 2800, + "start": 32.06, + "end": 33.18, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.913, + "words": [ + { + "text": "1", + "start": 32.06, + "end": 33.18, + "confidence": 0.913 + } + ] + }, + { + "id": 17, + "seek": 2800, + "start": 33.5, + "end": 35.1, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.889, + "words": [ + { + "text": "1", + "start": 33.5, + "end": 35.1, + "confidence": 0.889 + } + ] + }, + { + "id": 18, + "seek": 2800, + "start": 36.18, + "end": 36.64, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 36.18, + "end": 36.64, + "confidence": 0.957 + } + ] + }, + { + "id": 19, + "seek": 2800, + "start": 37.5, + "end": 38.46, + "text": "1", + "tokens": [ + 50864, + 16, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.959, + "words": [ + { + "text": "1", + "start": 37.5, + "end": 38.46, + "confidence": 0.959 + } + ] + }, + { + "id": 20, + "seek": 2800, + "start": 39.5, + "end": 41.14, + "text": "1", + "tokens": [ + 50964, + 16, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 39.5, + "end": 41.14, + "confidence": 0.966 + } + ] + }, + { + "id": 21, + "seek": 2800, + "start": 41.5, + "end": 43.38, + "text": "1", + "tokens": [ + 51064, + 16, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.973, + "words": [ + { + "text": "1", + "start": 41.5, + "end": 43.38, + "confidence": 0.973 + } + ] + }, + { + "id": 22, + "seek": 2800, + "start": 43.88, + "end": 44.68, + "text": "1", + "tokens": [ + 51164, + 16, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.946, + "words": [ + { + "text": "1", + "start": 43.88, + "end": 44.68, + "confidence": 0.946 + } + ] + }, + { + "id": 23, + "seek": 2800, + "start": 45.5, + "end": 47.16, + "text": "1", + "tokens": [ + 51264, + 16, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.959, + "words": [ + { + "text": "1", + "start": 45.5, + "end": 47.16, + "confidence": 0.959 + } + ] + }, + { + "id": 24, + "seek": 2800, + "start": 47.94, + "end": 48.8, + "text": "1", + "tokens": [ + 51364, + 16, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.965, + "words": [ + { + "text": "1", + "start": 47.94, + "end": 48.8, + "confidence": 0.965 + } + ] + }, + { + "id": 25, + "seek": 2800, + "start": 50.22, + "end": 50.96, + "text": "1", + "tokens": [ + 51464, + 16, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 50.22, + "end": 50.96, + "confidence": 0.967 + } + ] + }, + { + "id": 26, + "seek": 2800, + "start": 52.2, + "end": 52.84, + "text": "1", + "tokens": [ + 51564, + 16, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.963, + "words": [ + { + "text": "1", + "start": 52.2, + "end": 52.84, + "confidence": 0.963 + } + ] + }, + { + "id": 27, + "seek": 2800, + "start": 53.5, + "end": 54.78, + "text": "1", + "tokens": [ + 51664, + 16, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.1149740219116211, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18289069831371307, + "confidence": 0.945, + "words": [ + { + "text": "1", + "start": 53.5, + "end": 54.78, + "confidence": 0.945 + } + ] + }, + { + "id": 28, + "seek": 5600, + "start": 56.56, + "end": 57.24, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.424, + "words": [ + { + "text": "1", + "start": 56.56, + "end": 57.24, + "confidence": 0.424 + } + ] + }, + { + "id": 29, + "seek": 5600, + "start": 57.64, + "end": 58.86, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.789, + "words": [ + { + "text": "1", + "start": 57.64, + "end": 58.86, + "confidence": 0.789 + } + ] + }, + { + "id": 30, + "seek": 5600, + "start": 60.08, + "end": 60.92, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 60.08, + "end": 60.92, + "confidence": 0.954 + } + ] + }, + { + "id": 31, + "seek": 5600, + "start": 61.5, + "end": 62.62, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.949, + "words": [ + { + "text": "1", + "start": 61.5, + "end": 62.62, + "confidence": 0.949 + } + ] + }, + { + "id": 32, + "seek": 5600, + "start": 64.36, + "end": 65.06, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 64.36, + "end": 65.06, + "confidence": 0.954 + } + ] + }, + { + "id": 33, + "seek": 5600, + "start": 65.5, + "end": 66.92, + "text": "1", + "tokens": [ + 50864, + 16, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 65.5, + "end": 66.92, + "confidence": 0.952 + } + ] + }, + { + "id": 34, + "seek": 5600, + "start": 67.96, + "end": 69.06, + "text": "1", + "tokens": [ + 50964, + 16, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.965, + "words": [ + { + "text": "1", + "start": 67.96, + "end": 69.06, + "confidence": 0.965 + } + ] + }, + { + "id": 35, + "seek": 5600, + "start": 70.38, + "end": 71.69, + "text": "1", + "tokens": [ + 51064, + 16, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 70.38, + "end": 71.69, + "confidence": 0.957 + } + ] + }, + { + "id": 36, + "seek": 5600, + "start": 71.69, + "end": 73.56, + "text": "1", + "tokens": [ + 51164, + 16, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 71.69, + "end": 73.56, + "confidence": 0.957 + } + ] + }, + { + "id": 37, + "seek": 5600, + "start": 73.56, + "end": 75.34, + "text": "1", + "tokens": [ + 51264, + 16, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.955, + "words": [ + { + "text": "1", + "start": 73.56, + "end": 75.34, + "confidence": 0.955 + } + ] + }, + { + "id": 38, + "seek": 5600, + "start": 75.5, + "end": 76.42, + "text": "1", + "tokens": [ + 51364, + 16, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.939, + "words": [ + { + "text": "1", + "start": 75.5, + "end": 76.42, + "confidence": 0.939 + } + ] + }, + { + "id": 39, + "seek": 5600, + "start": 78.14, + "end": 78.78, + "text": "1", + "tokens": [ + 51464, + 16, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.936, + "words": [ + { + "text": "1", + "start": 78.14, + "end": 78.78, + "confidence": 0.936 + } + ] + }, + { + "id": 40, + "seek": 5600, + "start": 80.08, + "end": 80.86, + "text": "1", + "tokens": [ + 51564, + 16, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.921, + "words": [ + { + "text": "1", + "start": 80.08, + "end": 80.86, + "confidence": 0.921 + } + ] + }, + { + "id": 41, + "seek": 5600, + "start": 81.5, + "end": 82.84, + "text": "1", + "tokens": [ + 51664, + 16, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.12077425826679576, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18519507348537445, + "confidence": 0.896, + "words": [ + { + "text": "1", + "start": 81.5, + "end": 82.84, + "confidence": 0.896 + } + ] + }, + { + "id": 42, + "seek": 8400, + "start": 84.82, + "end": 85.6, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.793, + "words": [ + { + "text": "1", + "start": 84.82, + "end": 85.6, + "confidence": 0.793 + } + ] + }, + { + "id": 43, + "seek": 8400, + "start": 85.6, + "end": 86.8, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.879, + "words": [ + { + "text": "1", + "start": 85.6, + "end": 86.8, + "confidence": 0.879 + } + ] + }, + { + "id": 44, + "seek": 8400, + "start": 87.76, + "end": 88.72, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 87.76, + "end": 88.72, + "confidence": 0.954 + } + ] + }, + { + "id": 45, + "seek": 8400, + "start": 89.5, + "end": 90.52, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.95, + "words": [ + { + "text": "1", + "start": 89.5, + "end": 90.52, + "confidence": 0.95 + } + ] + }, + { + "id": 46, + "seek": 8400, + "start": 91.5, + "end": 93.02, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.911, + "words": [ + { + "text": "1", + "start": 91.5, + "end": 93.02, + "confidence": 0.911 + } + ] + }, + { + "id": 47, + "seek": 8400, + "start": 93.68, + "end": 95.36, + "text": "1", + "tokens": [ + 50864, + 16, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.944, + "words": [ + { + "text": "1", + "start": 93.68, + "end": 95.36, + "confidence": 0.944 + } + ] + }, + { + "id": 48, + "seek": 8400, + "start": 95.86, + "end": 97.84, + "text": "1", + "tokens": [ + 50964, + 16, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 95.86, + "end": 97.84, + "confidence": 0.967 + } + ] + }, + { + "id": 49, + "seek": 8400, + "start": 97.84, + "end": 98.98, + "text": "1", + "tokens": [ + 51064, + 16, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.976, + "words": [ + { + "text": "1", + "start": 97.84, + "end": 98.98, + "confidence": 0.976 + } + ] + }, + { + "id": 50, + "seek": 8400, + "start": 99.5, + "end": 101.22, + "text": "1", + "tokens": [ + 51164, + 16, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.981, + "words": [ + { + "text": "1", + "start": 99.5, + "end": 101.22, + "confidence": 0.981 + } + ] + }, + { + "id": 51, + "seek": 8400, + "start": 101.5, + "end": 103.48, + "text": "1", + "tokens": [ + 51264, + 16, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.984, + "words": [ + { + "text": "1", + "start": 101.5, + "end": 103.48, + "confidence": 0.984 + } + ] + }, + { + "id": 52, + "seek": 8400, + "start": 103.5, + "end": 105.46, + "text": "1", + "tokens": [ + 51364, + 16, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.986, + "words": [ + { + "text": "1", + "start": 103.5, + "end": 105.46, + "confidence": 0.986 + } + ] + }, + { + "id": 53, + "seek": 8400, + "start": 105.58, + "end": 107.08, + "text": "1", + "tokens": [ + 51464, + 16, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.956, + "words": [ + { + "text": "1", + "start": 105.58, + "end": 107.08, + "confidence": 0.956 + } + ] + }, + { + "id": 54, + "seek": 8400, + "start": 108.32, + "end": 108.98, + "text": "1", + "tokens": [ + 51564, + 16, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.945, + "words": [ + { + "text": "1", + "start": 108.32, + "end": 108.98, + "confidence": 0.945 + } + ] + }, + { + "id": 55, + "seek": 8400, + "start": 109.5, + "end": 110.4, + "text": "1", + "tokens": [ + 51664, + 16, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.10700221495194868, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18210835754871368, + "confidence": 0.888, + "words": [ + { + "text": "1", + "start": 109.5, + "end": 110.4, + "confidence": 0.888 + } + ] + }, + { + "id": 56, + "seek": 11200, + "start": 112.66, + "end": 113.32, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.12910348176956177, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.2795506715774536, + "confidence": 0.917, + "words": [ + { + "text": "1", + "start": 112.66, + "end": 113.32, + "confidence": 0.917 + } + ] + }, + { + "id": 57, + "seek": 11200, + "start": 113.98, + "end": 114.82, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.12910348176956177, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.2795506715774536, + "confidence": 0.888, + "words": [ + { + "text": "1", + "start": 113.98, + "end": 114.82, + "confidence": 0.888 + } + ] + }, + { + "id": 58, + "seek": 11200, + "start": 116.1, + "end": 117.3, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.12910348176956177, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.2795506715774536, + "confidence": 0.979, + "words": [ + { + "text": "1", + "start": 116.1, + "end": 117.3, + "confidence": 0.979 + } + ] + }, + { + "id": 59, + "seek": 11200, + "start": 117.5, + "end": 118.68, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.12910348176956177, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.2795506715774536, + "confidence": 0.981, + "words": [ + { + "text": "1", + "start": 117.5, + "end": 118.68, + "confidence": 0.981 + } + ] + }, + { + "id": 60, + "seek": 11200, + "start": 119.5, + "end": 120.86, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.12910348176956177, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.2795506715774536, + "confidence": 0.973, + "words": [ + { + "text": "1", + "start": 119.5, + "end": 120.86, + "confidence": 0.973 + } + ] + } + ], + "language": "zh", + "language_probs": { + "en": 0.36062490940093994, + "zh": 0.4023061692714691, + "de": 0.01767610013484955, + "es": 0.013342619873583317, + "ru": 0.02454083226621151, + "ko": 0.032007280737161636, + "fr": 0.010391242802143097, + "ja": 0.02492729388177395, + "pt": 0.01442681159824133, + "tr": 0.011960247531533241, + "pl": 0.006709090434014797, + "ca": 0.00019636373326648027, + "nl": 0.0022826530039310455, + "ar": 0.006302607245743275, + "sv": 0.0012804523576050997, + "it": 0.005649620667099953, + "id": 0.0013419024180620909, + "hi": 0.0014509425964206457, + "fi": 0.0006048428476788104, + "vi": 0.004611086566001177, + "he": 0.00036685573286376894, + "uk": 0.0006539910682477057, + "el": 0.0013006164226680994, + "ms": 0.0030240239575505257, + "cs": 0.00037263287231326103, + "ro": 0.000981756835244596, + "da": 0.00044251244980841875, + "hu": 0.0023185997270047665, + "ta": 0.00022601327509619296, + "no": 0.00029020674992352724, + "th": 0.0027534051332622766, + "ur": 0.0006240426446311176, + "hr": 6.0830639995401725e-05, + "bg": 4.591737888404168e-05, + "lt": 1.0737247066572309e-05, + "la": 0.0025464834179729223, + "mi": 0.00035005618701688945, + "ml": 0.00017601929721422493, + "cy": 0.002977140713483095, + "sk": 1.9748818886000663e-05, + "te": 0.00020259697339497507, + "fa": 0.0007295797113329172, + "lv": 2.3220923139888328e-06, + "bn": 0.00011543627624632791, + "sr": 2.5503195502096787e-06, + "az": 2.147584382328205e-06, + "sl": 3.2055493647931144e-05, + "kn": 6.972185815357079e-07, + "et": 2.0492393559834454e-06, + "mk": 3.242367085931619e-07, + "br": 0.0006747509469278157, + "eu": 2.496484376024455e-05, + "is": 1.8844455553335138e-05, + "hy": 6.512469099106966e-06, + "ne": 2.3821621653041802e-05, + "mn": 1.9442642951617017e-05, + "bs": 2.0375713575049303e-05, + "kk": 2.0174688870611135e-06, + "sq": 1.8658535054782988e-06, + "sw": 1.1252537660766393e-05, + "gl": 0.00017601929721422493, + "mr": 1.6725402929296251e-06, + "pa": 4.993283255316783e-06, + "si": 0.00014366276445798576, + "km": 0.00042224835488013923, + "sn": 0.0001844666403485462, + "yo": 3.9275215385714546e-05, + "so": 1.708607584305355e-07, + "af": 1.8369262306805467e-06, + "oc": 1.4676075807074085e-05, + "ka": 3.192099029547535e-07, + "be": 3.8066846173023805e-05, + "tg": 6.188167134268951e-08, + "sd": 5.837739081471227e-06, + "gu": 2.447467295496608e-07, + "am": 1.982019313118144e-07, + "yi": 1.024555149342632e-05, + "lo": 1.4676075807074085e-05, + "uz": 1.0753421264553253e-08, + "fo": 1.2166893611720297e-05, + "ht": 8.493861059832852e-06, + "ps": 1.197826350107789e-05, + "tk": 7.406265467579942e-08, + "nn": 0.02132144570350647, + "mt": 1.9512908977503685e-07, + "sa": 0.00014366276445798576, + "lb": 2.241205443453964e-08, + "my": 0.00010510591528145596, + "bo": 2.135356407961808e-05, + "tl": 0.0008139049750752747, + "mg": 1.069219308647007e-07, + "as": 1.2236566817591665e-06, + "tt": 1.0586703957926602e-08, + "haw": 0.005064287222921848, + "ln": 2.688016991214681e-07, + "ha": 5.334533170753275e-08, + "ba": 7.29144176148111e-08, + "jw": 0.007368494290858507, + "su": 1.4054621999548544e-07 + } +} \ No newline at end of file diff --git a/tests/expected/medium_auto/smartphone.mp3.words.json b/tests/expected/medium_auto/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..bdd08b7a2a019a4c4894dc96c6c9c711679ab576 --- /dev/null +++ b/tests/expected/medium_auto/smartphone.mp3.words.json @@ -0,0 +1,5000 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc à objet inédit, rapport inédit. Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit, d'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.4, + "end": 3.66, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.4, + "end": 0.64, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.64, + "end": 0.9, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.9, + "end": 1.0, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.0, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.48, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.7, + "end": 2.04, + "confidence": 0.979 + }, + { + "text": "je", + "start": 2.04, + "end": 2.26, + "confidence": 0.981 + }, + { + "text": "ne", + "start": 2.26, + "end": 2.34, + "confidence": 0.835 + }, + { + "text": "me", + "start": 2.34, + "end": 2.36, + "confidence": 0.821 + }, + { + "text": "l'étais", + "start": 2.36, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.88, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.88, + "end": 3.26, + "confidence": 0.911 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.44, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.44, + "end": 3.66, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.16, + "end": 7.94, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 50549, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11, + 50756 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.931, + "words": [ + { + "text": "Ce", + "start": 4.16, + "end": 4.28, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.28, + "end": 4.36, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.36, + "end": 4.5, + "confidence": 0.568 + }, + { + "text": "la", + "start": 4.5, + "end": 4.76, + "confidence": 0.971 + }, + { + "text": "force", + "start": 4.76, + "end": 5.02, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.02, + "end": 5.22, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.22, + "end": 5.7, + "confidence": 0.911 + }, + { + "text": "c'est", + "start": 6.04, + "end": 6.16, + "confidence": 0.88 + }, + { + "text": "pas", + "start": 6.16, + "end": 6.26, + "confidence": 0.992 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.54, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.54, + "end": 7.4, + "confidence": 0.957 + }, + { + "text": "des", + "start": 7.4, + "end": 7.58, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.58, + "end": 7.94, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 50756, + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.48, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.48, + "end": 8.7, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.7, + "end": 8.94, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.94, + "end": 9.08, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.08, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.38, + "confidence": 0.965 + }, + { + "text": "entre", + "start": 10.38, + "end": 10.7, + "confidence": 0.955 + }, + { + "text": "elles.", + "start": 10.7, + "end": 10.88, + "confidence": 0.989 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 50906, + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.608 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.26, + "confidence": 0.764 + }, + { + "text": "dit", + "start": 11.26, + "end": 11.38, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.38, + "end": 11.58, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.58, + "end": 11.72, + "confidence": 0.48 + }, + { + "text": "la", + "start": 11.72, + "end": 11.82, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.82, + "end": 12.0, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.14, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.48, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.48, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.38, + "end": 16.04, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13, + 51166 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.913, + "words": [ + { + "text": "Alors", + "start": 13.38, + "end": 13.58, + "confidence": 0.585 + }, + { + "text": "évidemment,", + "start": 13.58, + "end": 13.86, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.26, + "end": 14.42, + "confidence": 0.953 + }, + { + "text": "faudrait", + "start": 14.42, + "end": 14.76, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.2, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.2, + "end": 15.6, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.6, + "end": 16.04, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 51166, + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13, + 51331 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.924, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.78, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.78, + "end": 17.1, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.1, + "end": 17.3, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.3, + "end": 17.84, + "confidence": 0.975 + }, + { + "text": "beaucoup", + "start": 17.84, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.448 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 25.44, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 51331, + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13, + 51631 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.921, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.943 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.5, + "confidence": 0.666 + }, + { + "text": "il", + "start": 20.52, + "end": 20.62, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.62, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.82, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.82, + "end": 21.04, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 21.04, + "end": 21.3, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.3, + "end": 21.7, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.7, + "end": 21.94, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.94, + "end": 22.16, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.16, + "end": 22.3, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.3, + "end": 22.54, + "confidence": 0.723 + }, + { + "text": "lesquels", + "start": 22.54, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.14, + "confidence": 0.709 + }, + { + "text": "ordinateurs", + "start": 23.14, + "end": 23.58, + "confidence": 0.966 + }, + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.88, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.88, + "end": 24.12, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.829 + }, + { + "text": "cliquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.72, + "end": 24.92, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.92, + "end": 25.44, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.54, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.968, + "words": [ + { + "text": "Sauf", + "start": 25.54, + "end": 25.82, + "confidence": 0.974 + }, + { + "text": "que", + "start": 25.82, + "end": 26.3, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.3, + "end": 26.66, + "confidence": 0.672 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.08, + "confidence": 0.995 + }, + { + "text": "ajoute", + "start": 27.08, + "end": 27.48, + "confidence": 0.99 + }, + { + "text": "le", + "start": 27.48, + "end": 27.66, + "confidence": 0.989 + }, + { + "text": "toucher,", + "start": 27.66, + "end": 28.06, + "confidence": 0.986 + }, + { + "text": "ce", + "start": 28.14, + "end": 28.16, + "confidence": 0.951 + }, + { + "text": "qui", + "start": 28.16, + "end": 28.3, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.3, + "end": 28.54, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.54, + "end": 28.8, + "confidence": 0.992 + }, + { + "text": "contact", + "start": 28.8, + "end": 29.16, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.16, + "end": 29.58, + "confidence": 0.983 + }, + { + "text": "direct,", + "start": 29.58, + "end": 30.0, + "confidence": 0.991 + }, + { + "text": "plus", + "start": 30.1, + "end": 30.24, + "confidence": 0.99 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 50639, + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.893, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.963 + }, + { + "text": "puis", + "start": 31.24, + "end": 31.34, + "confidence": 0.967 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.62, + "confidence": 0.868 + }, + { + "text": "il", + "start": 31.66, + "end": 31.72, + "confidence": 0.99 + }, + { + "text": "faudrait", + "start": 31.72, + "end": 31.94, + "confidence": 0.996 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.16, + "confidence": 0.866 + }, + { + "text": "aussi", + "start": 32.16, + "end": 32.34, + "confidence": 0.971 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.993 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.92, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.92, + "end": 33.22, + "confidence": 0.562 + }, + { + "text": "permettent", + "start": 33.22, + "end": 33.76, + "confidence": 0.99 + }, + { + "text": "de", + "start": 33.76, + "end": 33.98, + "confidence": 0.872 + }, + { + "text": "contourner", + "start": 33.98, + "end": 34.42, + "confidence": 0.953 + }, + { + "text": "le", + "start": 34.42, + "end": 34.54, + "confidence": 0.75 + }, + { + "text": "côté", + "start": 34.54, + "end": 34.78, + "confidence": 0.985 + }, + { + "text": "touffu", + "start": 34.78, + "end": 35.32, + "confidence": 0.727 + }, + { + "text": "de", + "start": 35.32, + "end": 35.7, + "confidence": 0.881 + }, + { + "text": "la", + "start": 35.7, + "end": 35.82, + "confidence": 0.991 + }, + { + "text": "navigation", + "start": 35.82, + "end": 36.3, + "confidence": 0.992 + }, + { + "text": "web", + "start": 36.3, + "end": 36.58, + "confidence": 0.847 + }, + { + "text": "pour", + "start": 36.58, + "end": 36.76, + "confidence": 0.589 + }, + { + "text": "aller", + "start": 36.76, + "end": 37.16, + "confidence": 0.981 + }, + { + "text": "directement", + "start": 37.16, + "end": 37.54, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.54, + "end": 37.7, + "confidence": 0.969 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.9, + "end": 46.6, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 50989, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51439 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.973, + "words": [ + { + "text": "Bref,", + "start": 37.9, + "end": 38.24, + "confidence": 0.984 + }, + { + "text": "tout", + "start": 38.88, + "end": 39.02, + "confidence": 0.7 + }, + { + "text": "ça,", + "start": 39.02, + "end": 39.4, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.4, + "end": 39.72, + "confidence": 0.989 + }, + { + "text": "sont", + "start": 39.72, + "end": 39.92, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.92, + "end": 40.22, + "confidence": 0.992 + }, + { + "text": "conditions", + "start": 40.22, + "end": 40.7, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.7, + "end": 41.02, + "confidence": 0.996 + }, + { + "text": "permettent", + "start": 41.02, + "end": 41.44, + "confidence": 0.996 + }, + { + "text": "de", + "start": 41.44, + "end": 41.74, + "confidence": 0.997 + }, + { + "text": "créer", + "start": 41.74, + "end": 42.1, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.1, + "end": 42.38, + "confidence": 0.997 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.64, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.64, + "end": 42.84, + "confidence": 0.79 + }, + { + "text": "Nicolas", + "start": 42.84, + "end": 43.24, + "confidence": 0.988 + }, + { + "text": "dit", + "start": 43.24, + "end": 43.52, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.74, + "confidence": 0.982 + }, + { + "text": "est", + "start": 43.74, + "end": 43.94, + "confidence": 0.99 + }, + { + "text": "vraisemblablement", + "start": 43.94, + "end": 44.86, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.44, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 45.44, + "end": 45.72, + "confidence": 0.967 + }, + { + "text": "l'histoire", + "start": 45.72, + "end": 46.02, + "confidence": 0.957 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.6, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 47.02, + "end": 48.78, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 51439, + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 47.02, + "end": 47.28, + "confidence": 0.841 + }, + { + "text": "ça,", + "start": 47.28, + "end": 47.48, + "confidence": 0.942 + }, + { + "text": "ça", + "start": 47.62, + "end": 47.64, + "confidence": 0.976 + }, + { + "text": "soulève", + "start": 47.64, + "end": 47.86, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.86, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.2, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.2, + "end": 48.78, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.36, + "end": 55.5, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ?", + "tokens": [ + 50389, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506, + 50689 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.986, + "words": [ + { + "text": "Est-ce", + "start": 49.36, + "end": 49.64, + "confidence": 0.978 + }, + { + "text": "que", + "start": 49.64, + "end": 49.76, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.76, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.0, + "confidence": 0.999 + }, + { + "text": "que", + "start": 50.0, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.989 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.78, + "confidence": 0.995 + }, + { + "text": "induit", + "start": 51.78, + "end": 52.32, + "confidence": 0.956 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.984 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.78, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 52.78, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.46, + "confidence": 0.979 + }, + { + "text": "lui", + "start": 53.46, + "end": 53.68, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.68, + "end": 54.14, + "confidence": 0.907 + }, + { + "text": "aussi", + "start": 54.14, + "end": 54.52, + "confidence": 0.996 + }, + { + "text": "un", + "start": 54.52, + "end": 54.72, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 54.72, + "end": 55.0, + "confidence": 0.997 + }, + { + "text": "inédit ?", + "start": 55.0, + "end": 55.5, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.76, + "end": 63.4, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50689, + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.96, + "words": [ + { + "text": "Je", + "start": 55.76, + "end": 55.9, + "confidence": 0.904 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.989 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.12, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.22, + "end": 56.38, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.38, + "end": 56.5, + "confidence": 0.991 + }, + { + "text": "le", + "start": 56.5, + "end": 56.66, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.66, + "end": 56.86, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.86, + "end": 57.08, + "confidence": 0.985 + }, + { + "text": "a", + "start": 57.08, + "end": 57.16, + "confidence": 0.99 + }, + { + "text": "au", + "start": 57.16, + "end": 57.28, + "confidence": 0.966 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.995 + }, + { + "text": "est", + "start": 57.6, + "end": 57.86, + "confidence": 0.911 + }, + { + "text": "comparable", + "start": 57.86, + "end": 58.3, + "confidence": 0.996 + }, + { + "text": "à", + "start": 58.3, + "end": 58.5, + "confidence": 0.946 + }, + { + "text": "celui", + "start": 58.5, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.96, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.96, + "end": 59.36, + "confidence": 0.926 + }, + { + "text": "à", + "start": 59.36, + "end": 59.48, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.48, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.98, + "confidence": 0.99 + }, + { + "text": "techniques", + "start": 59.98, + "end": 60.44, + "confidence": 0.984 + }, + { + "text": "comme", + "start": 60.44, + "end": 60.96, + "confidence": 0.496 + }, + { + "text": "la", + "start": 60.96, + "end": 61.52, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.52, + "end": 62.08, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.08, + "end": 62.52, + "confidence": 0.949 + }, + { + "text": "le", + "start": 62.52, + "end": 62.74, + "confidence": 0.998 + }, + { + "text": "téléphone ?", + "start": 62.74, + "end": 63.4, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 65.36, + "end": 66.62, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 51189, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13, + 51239 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.94, + "words": [ + { + "text": "Il", + "start": 65.36, + "end": 65.5, + "confidence": 0.85 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.3, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.3, + "end": 66.44, + "confidence": 0.582 + }, + { + "text": "fait.", + "start": 66.44, + "end": 66.62, + "confidence": 0.997 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.24, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 51239, + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.957, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 67.02, + "confidence": 0.655 + }, + { + "text": "donc", + "start": 67.02, + "end": 67.12, + "confidence": 0.902 + }, + { + "text": "cette", + "start": 67.12, + "end": 67.3, + "confidence": 0.719 + }, + { + "text": "espèce", + "start": 67.3, + "end": 67.54, + "confidence": 0.996 + }, + { + "text": "de", + "start": 67.54, + "end": 67.76, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.76, + "end": 68.48, + "confidence": 0.981 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.84, + "confidence": 0.982 + }, + { + "text": "la", + "start": 68.84, + "end": 68.96, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.24, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.0, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 70.28, + "end": 70.38, + "confidence": 0.977 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.68, + "confidence": 0.98 + }, + { + "text": "et", + "start": 70.68, + "end": 70.76, + "confidence": 0.972 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.24, + "confidence": 0.978 + } + ] + }, + { + "id": 15, + "seek": 7134, + "start": 71.62, + "end": 76.42, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet,", + "tokens": [ + 50389, + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 394, + 368, + 8603, + 14964, + 11, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.86, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.9, + "confidence": 0.844 + }, + { + "text": "qu'on", + "start": 71.9, + "end": 72.4, + "confidence": 0.968 + }, + { + "text": "a", + "start": 72.4, + "end": 72.54, + "confidence": 0.979 + }, + { + "text": "l'impression,", + "start": 72.54, + "end": 72.9, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 73.66, + "end": 73.9, + "confidence": 0.982 + }, + { + "text": "le", + "start": 73.9, + "end": 74.02, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.02, + "end": 74.22, + "confidence": 0.997 + }, + { + "text": "les", + "start": 74.22, + "end": 74.52, + "confidence": 0.996 + }, + { + "text": "utilisateurs", + "start": 74.52, + "end": 74.84, + "confidence": 0.998 + }, + { + "text": "et", + "start": 74.84, + "end": 74.94, + "confidence": 0.843 + }, + { + "text": "les", + "start": 74.94, + "end": 75.02, + "confidence": 0.779 + }, + { + "text": "services,", + "start": 75.02, + "end": 75.18, + "confidence": 0.125 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.5, + "confidence": 0.934 + }, + { + "text": "dépendant", + "start": 75.5, + "end": 75.98, + "confidence": 0.709 + }, + { + "text": "de", + "start": 75.98, + "end": 76.08, + "confidence": 0.977 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.998 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.42, + "confidence": 0.997 + } + ] + }, + { + "id": 16, + "seek": 7134, + "start": 76.42, + "end": 83.3, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 50639, + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.923, + "words": [ + { + "text": "d'induire", + "start": 76.42, + "end": 77.1, + "confidence": 0.964 + }, + { + "text": "en", + "start": 77.1, + "end": 77.18, + "confidence": 0.721 + }, + { + "text": "fait", + "start": 77.18, + "end": 77.34, + "confidence": 0.997 + }, + { + "text": "une", + "start": 77.34, + "end": 77.5, + "confidence": 0.979 + }, + { + "text": "espèce", + "start": 77.5, + "end": 77.88, + "confidence": 0.997 + }, + { + "text": "de", + "start": 77.88, + "end": 78.28, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.28, + "end": 78.54, + "confidence": 0.42 + }, + { + "text": "de", + "start": 78.66, + "end": 78.96, + "confidence": 0.992 + }, + { + "text": "médiation", + "start": 78.96, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.76, + "confidence": 0.954 + }, + { + "text": "le", + "start": 79.76, + "end": 80.02, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 80.02, + "end": 80.3, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 80.3, + "end": 81.24, + "confidence": 0.664 + }, + { + "text": "rend", + "start": 81.24, + "end": 81.66, + "confidence": 0.968 + }, + { + "text": "de", + "start": 81.66, + "end": 81.74, + "confidence": 0.688 + }, + { + "text": "l'ampleur", + "start": 81.74, + "end": 82.02, + "confidence": 0.995 + }, + { + "text": "et", + "start": 82.02, + "end": 82.1, + "confidence": 0.953 + }, + { + "text": "qui", + "start": 82.1, + "end": 82.2, + "confidence": 0.987 + }, + { + "text": "amène", + "start": 82.2, + "end": 82.38, + "confidence": 0.971 + }, + { + "text": "aussi", + "start": 82.38, + "end": 82.54, + "confidence": 0.947 + }, + { + "text": "à", + "start": 82.54, + "end": 82.62, + "confidence": 0.959 + }, + { + "text": "des", + "start": 82.62, + "end": 82.7, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.7, + "end": 82.88, + "confidence": 0.995 + }, + { + "text": "de", + "start": 82.88, + "end": 83.04, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.04, + "end": 83.3, + "confidence": 0.802 + } + ] + }, + { + "id": 17, + "seek": 7134, + "start": 83.92, + "end": 87.7, + "text": " Donc à objet inédit, rapport inédit.", + "tokens": [ + 50989, + 7477, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13, + 51189 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.901, + "words": [ + { + "text": "Donc", + "start": 83.92, + "end": 84.46, + "confidence": 0.984 + }, + { + "text": "à", + "start": 84.46, + "end": 84.98, + "confidence": 0.481 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.46, + "confidence": 0.771 + }, + { + "text": "inédit,", + "start": 85.46, + "end": 86.2, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 86.54, + "end": 86.9, + "confidence": 0.986 + }, + { + "text": "inédit.", + "start": 86.9, + "end": 87.7, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7134, + "start": 88.02, + "end": 94.92, + "text": " Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 51189, + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 1531, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.974, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.62, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 88.62, + "end": 88.9, + "confidence": 0.985 + }, + { + "text": "rapport,", + "start": 88.9, + "end": 89.32, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.78, + "confidence": 0.997 + }, + { + "text": "crois", + "start": 89.78, + "end": 89.88, + "confidence": 0.988 + }, + { + "text": "à", + "start": 89.88, + "end": 90.06, + "confidence": 0.72 + }, + { + "text": "Nicolas,", + "start": 90.06, + "end": 90.24, + "confidence": 0.997 + }, + { + "text": "serait", + "start": 90.7, + "end": 91.0, + "confidence": 0.904 + }, + { + "text": "caractérisé", + "start": 91.0, + "end": 91.8, + "confidence": 0.994 + }, + { + "text": "par", + "start": 91.8, + "end": 92.22, + "confidence": 0.995 + }, + { + "text": "un", + "start": 92.22, + "end": 92.52, + "confidence": 0.996 + }, + { + "text": "mélange", + "start": 92.52, + "end": 93.04, + "confidence": 0.999 + }, + { + "text": "de", + "start": 93.04, + "end": 93.46, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.46, + "end": 94.12, + "confidence": 0.937 + }, + { + "text": "et", + "start": 94.12, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.72, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.72, + "end": 94.92, + "confidence": 0.988 + } + ] + }, + { + "id": 19, + "seek": 9484, + "start": 95.74, + "end": 102.82, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 50389, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.932, + "words": [ + { + "text": "Bon,", + "start": 95.74, + "end": 96.0, + "confidence": 0.753 + }, + { + "text": "en", + "start": 96.34, + "end": 96.54, + "confidence": 0.992 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.86, + "confidence": 0.991 + }, + { + "text": "il", + "start": 97.06, + "end": 97.18, + "confidence": 0.992 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.996 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.06, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.06, + "end": 98.58, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.58, + "end": 98.84, + "confidence": 0.759 + }, + { + "text": "finement", + "start": 98.84, + "end": 99.38, + "confidence": 0.823 + }, + { + "text": "toute", + "start": 99.38, + "end": 99.7, + "confidence": 0.937 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.08, + "confidence": 0.996 + }, + { + "text": "des", + "start": 100.08, + "end": 100.26, + "confidence": 0.991 + }, + { + "text": "objets", + "start": 100.26, + "end": 100.52, + "confidence": 0.998 + }, + { + "text": "techniques", + "start": 100.52, + "end": 100.94, + "confidence": 0.982 + }, + { + "text": "et", + "start": 100.94, + "end": 101.54, + "confidence": 0.512 + }, + { + "text": "de", + "start": 101.54, + "end": 101.7, + "confidence": 0.984 + }, + { + "text": "leur", + "start": 101.7, + "end": 101.84, + "confidence": 0.82 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.34, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.969 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.82, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 9484, + "start": 102.86, + "end": 105.72, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 50739, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13, + 50889 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.995, + "words": [ + { + "text": "pour", + "start": 102.86, + "end": 103.1, + "confidence": 0.989 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.995 + }, + { + "text": "si", + "start": 103.64, + "end": 103.76, + "confidence": 0.986 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.26, + "end": 104.84, + "confidence": 0.997 + }, + { + "text": "totalement", + "start": 104.84, + "end": 105.3, + "confidence": 0.998 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.72, + "confidence": 0.998 + } + ] + }, + { + "id": 21, + "seek": 9484, + "start": 106.14, + "end": 109.32, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 50889, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.931, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.4, + "confidence": 0.975 + }, + { + "text": "j'ai", + "start": 106.4, + "end": 106.92, + "confidence": 0.957 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.38, + "confidence": 0.999 + }, + { + "text": "comme", + "start": 107.38, + "end": 107.58, + "confidence": 0.513 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.82, + "confidence": 0.969 + }, + { + "text": "que", + "start": 107.82, + "end": 108.14, + "confidence": 0.974 + }, + { + "text": "Nicolas", + "start": 108.14, + "end": 108.48, + "confidence": 0.983 + }, + { + "text": "ne", + "start": 108.48, + "end": 108.6, + "confidence": 0.713 + }, + { + "text": "se", + "start": 108.6, + "end": 108.72, + "confidence": 0.992 + }, + { + "text": "trompe", + "start": 108.72, + "end": 108.88, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.32, + "confidence": 0.989 + } + ] + }, + { + "id": 22, + "seek": 9484, + "start": 109.94, + "end": 115.06, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 51089, + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 51389 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.964, + "words": [ + { + "text": "Pour", + "start": 109.94, + "end": 110.1, + "confidence": 0.995 + }, + { + "text": "autant", + "start": 110.1, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.38, + "confidence": 0.984 + }, + { + "text": "je", + "start": 110.38, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 110.84, + "confidence": 0.953 + }, + { + "text": "il", + "start": 111.08, + "end": 111.16, + "confidence": 0.994 + }, + { + "text": "y", + "start": 111.16, + "end": 111.3, + "confidence": 0.994 + }, + { + "text": "a", + "start": 111.3, + "end": 111.32, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.32, + "end": 111.62, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.62, + "end": 111.9, + "confidence": 0.966 + }, + { + "text": "de", + "start": 111.9, + "end": 112.14, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.14, + "end": 112.66, + "confidence": 0.83 + }, + { + "text": "autour", + "start": 112.66, + "end": 113.02, + "confidence": 0.995 + }, + { + "text": "de", + "start": 113.02, + "end": 113.38, + "confidence": 0.996 + }, + { + "text": "la", + "start": 113.38, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.88, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.06, + "confidence": 0.69 + }, + { + "text": "même", + "start": 114.06, + "end": 114.34, + "confidence": 0.995 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.992 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.06, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 9484, + "start": 115.48, + "end": 117.7, + "text": " Mais la dépendance n'était pas du même ordre.", + "tokens": [ + 51389, + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.992, + "words": [ + { + "text": "Mais", + "start": 115.48, + "end": 115.78, + "confidence": 0.993 + }, + { + "text": "la", + "start": 115.78, + "end": 116.04, + "confidence": 0.944 + }, + { + "text": "dépendance", + "start": 116.04, + "end": 116.42, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.7, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.7, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.2, + "confidence": 0.996 + }, + { + "text": "même", + "start": 117.2, + "end": 117.44, + "confidence": 0.998 + }, + { + "text": "ordre.", + "start": 117.44, + "end": 117.7, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 9484, + "start": 117.72, + "end": 119.78, + "text": " Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 51489, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.978, + "words": [ + { + "text": "Donc", + "start": 117.72, + "end": 118.02, + "confidence": 0.959 + }, + { + "text": "le", + "start": 118.02, + "end": 118.4, + "confidence": 0.815 + }, + { + "text": "rejet", + "start": 118.4, + "end": 118.64, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.64, + "end": 118.82, + "confidence": 0.982 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.94, + "confidence": 0.994 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.18, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 119.18, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.42, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.42, + "end": 119.58, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.58, + "end": 119.78, + "confidence": 0.999 + } + ] + }, + { + "id": 25, + "seek": 9484, + "start": 120.04, + "end": 123.1, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 51589, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 51789 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.971, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.16, + "confidence": 0.757 + }, + { + "text": "peut", + "start": 120.16, + "end": 120.36, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.68, + "confidence": 0.991 + }, + { + "text": "sa", + "start": 120.68, + "end": 120.88, + "confidence": 0.98 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.34, + "confidence": 0.985 + }, + { + "text": "en", + "start": 121.42, + "end": 121.56, + "confidence": 0.971 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.8, + "confidence": 0.997 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.46, + "confidence": 0.989 + }, + { + "text": "plein", + "start": 122.46, + "end": 122.7, + "confidence": 0.987 + }, + { + "text": "de", + "start": 122.7, + "end": 122.78, + "confidence": 0.997 + }, + { + "text": "choses.", + "start": 122.78, + "end": 123.1, + "confidence": 0.99 + } + ] + }, + { + "id": 26, + "seek": 12334, + "start": 123.38, + "end": 126.44, + "text": " Le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50389, + 1456, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.941, + "words": [ + { + "text": "Le", + "start": 123.38, + "end": 123.96, + "confidence": 0.646 + }, + { + "text": "soir,", + "start": 123.96, + "end": 124.54, + "confidence": 0.998 + }, + { + "text": "quand", + "start": 124.74, + "end": 124.94, + "confidence": 0.982 + }, + { + "text": "on", + "start": 124.94, + "end": 125.06, + "confidence": 0.996 + }, + { + "text": "va", + "start": 125.06, + "end": 125.16, + "confidence": 0.989 + }, + { + "text": "se", + "start": 125.16, + "end": 125.26, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.7, + "confidence": 0.99 + }, + { + "text": "on", + "start": 125.94, + "end": 126.04, + "confidence": 0.992 + }, + { + "text": "la", + "start": 126.04, + "end": 126.18, + "confidence": 0.852 + }, + { + "text": "laisse.", + "start": 126.18, + "end": 126.44, + "confidence": 0.999 + } + ] + }, + { + "id": 27, + "seek": 12334, + "start": 127.0, + "end": 130.3, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 50539, + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.918, + "words": [ + { + "text": "On", + "start": 127.0, + "end": 127.34, + "confidence": 0.967 + }, + { + "text": "ne", + "start": 127.34, + "end": 127.46, + "confidence": 0.799 + }, + { + "text": "l'a", + "start": 127.46, + "end": 127.56, + "confidence": 0.929 + }, + { + "text": "pas", + "start": 127.56, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.86, + "confidence": 0.992 + }, + { + "text": "la", + "start": 127.86, + "end": 128.08, + "confidence": 0.996 + }, + { + "text": "main", + "start": 128.08, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.48, + "confidence": 0.936 + }, + { + "text": "on", + "start": 128.48, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.74, + "confidence": 0.992 + }, + { + "text": "au", + "start": 128.74, + "end": 128.94, + "confidence": 0.976 + }, + { + "text": "lit,", + "start": 128.94, + "end": 129.1, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.16, + "end": 129.28, + "confidence": 0.432 + }, + { + "text": "ne", + "start": 129.28, + "end": 129.3, + "confidence": 0.948 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.58, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 129.58, + "end": 129.7, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.7, + "end": 129.88, + "confidence": 0.72 + }, + { + "text": "chiottes.", + "start": 129.88, + "end": 130.3, + "confidence": 0.886 + } + ] + }, + { + "id": 28, + "seek": 12334, + "start": 130.82, + "end": 136.88, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 50739, + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51039 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.925, + "words": [ + { + "text": "On", + "start": 130.82, + "end": 131.06, + "confidence": 0.996 + }, + { + "text": "pouvait", + "start": 131.06, + "end": 131.26, + "confidence": 0.988 + }, + { + "text": "être", + "start": 131.26, + "end": 131.58, + "confidence": 0.995 + }, + { + "text": "énervé", + "start": 131.58, + "end": 132.22, + "confidence": 0.896 + }, + { + "text": "par", + "start": 132.22, + "end": 132.46, + "confidence": 0.992 + }, + { + "text": "son", + "start": 132.46, + "end": 132.72, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.72, + "end": 133.08, + "confidence": 0.758 + }, + { + "text": "qui", + "start": 133.08, + "end": 133.34, + "confidence": 0.917 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.74, + "confidence": 0.992 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.987 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.06, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.06, + "end": 134.22, + "confidence": 0.995 + }, + { + "text": "téléphone", + "start": 134.22, + "end": 134.6, + "confidence": 0.992 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.92, + "confidence": 0.71 + }, + { + "text": "une", + "start": 134.92, + "end": 135.16, + "confidence": 0.783 + }, + { + "text": "heure", + "start": 135.16, + "end": 135.34, + "confidence": 0.995 + }, + { + "text": "chaque", + "start": 135.34, + "end": 135.58, + "confidence": 0.982 + }, + { + "text": "soir", + "start": 135.58, + "end": 135.8, + "confidence": 0.995 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.98, + "confidence": 0.66 + }, + { + "text": "discuter", + "start": 135.98, + "end": 136.3, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.3, + "end": 136.5, + "confidence": 0.995 + }, + { + "text": "un", + "start": 136.5, + "end": 136.66, + "confidence": 0.997 + }, + { + "text": "copain.", + "start": 136.66, + "end": 136.88, + "confidence": 0.996 + } + ] + }, + { + "id": 29, + "seek": 12334, + "start": 137.26, + "end": 141.86, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 51039, + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11, + 51289 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.956, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.5, + "confidence": 0.989 + }, + { + "text": "ça", + "start": 137.5, + "end": 137.68, + "confidence": 0.927 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.88, + "confidence": 0.999 + }, + { + "text": "ressemblait", + "start": 137.88, + "end": 138.42, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 138.42, + "end": 138.78, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.78, + "end": 138.9, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 138.9, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.12, + "confidence": 0.976 + }, + { + "text": "peut", + "start": 139.12, + "end": 139.48, + "confidence": 0.979 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.1, + "confidence": 0.996 + }, + { + "text": "à", + "start": 140.1, + "end": 140.32, + "confidence": 0.575 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.731 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.7, + "confidence": 0.985 + }, + { + "text": "même", + "start": 140.7, + "end": 140.94, + "confidence": 0.982 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.28, + "confidence": 0.997 + }, + { + "text": "aujourd'hui,", + "start": 141.28, + "end": 141.86, + "confidence": 0.994 + } + ] + }, + { + "id": 30, + "seek": 12334, + "start": 142.16, + "end": 144.22, + "text": " continuellement avec son smartphone dans la main,", + "tokens": [ + 51289, + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.961, + "words": [ + { + "text": "continuellement", + "start": 142.16, + "end": 142.94, + "confidence": 0.963 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.2, + "confidence": 0.844 + }, + { + "text": "son", + "start": 143.2, + "end": 143.42, + "confidence": 0.994 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.76, + "confidence": 0.972 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.92, + "confidence": 0.964 + }, + { + "text": "la", + "start": 143.92, + "end": 144.02, + "confidence": 0.997 + }, + { + "text": "main,", + "start": 144.02, + "end": 144.22, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 12334, + "start": 144.34, + "end": 148.8, + "text": " comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 51414, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11, + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13, + 51639 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.973, + "words": [ + { + "text": "comme", + "start": 144.34, + "end": 144.56, + "confidence": 0.985 + }, + { + "text": "si", + "start": 144.56, + "end": 144.66, + "confidence": 0.972 + }, + { + "text": "c'était", + "start": 144.66, + "end": 144.86, + "confidence": 0.987 + }, + { + "text": "une", + "start": 144.86, + "end": 144.98, + "confidence": 0.983 + }, + { + "text": "sorte", + "start": 144.98, + "end": 145.14, + "confidence": 0.992 + }, + { + "text": "de", + "start": 145.14, + "end": 145.34, + "confidence": 0.989 + }, + { + "text": "pacemaker", + "start": 145.34, + "end": 145.82, + "confidence": 0.932 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.4, + "confidence": 0.99 + }, + { + "text": "comme", + "start": 146.44, + "end": 146.6, + "confidence": 0.724 + }, + { + "text": "si", + "start": 146.6, + "end": 146.74, + "confidence": 0.993 + }, + { + "text": "le", + "start": 146.74, + "end": 147.0, + "confidence": 0.996 + }, + { + "text": "lâcher", + "start": 147.0, + "end": 147.36, + "confidence": 0.967 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.58, + "confidence": 0.991 + }, + { + "text": "entraîner", + "start": 147.58, + "end": 147.88, + "confidence": 0.982 + }, + { + "text": "sa", + "start": 147.88, + "end": 148.02, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.02, + "end": 148.3, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.3, + "end": 148.8, + "confidence": 0.998 + } + ] + }, + { + "id": 32, + "seek": 12334, + "start": 148.94, + "end": 152.02, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 51639, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13, + 51839 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.973, + "words": [ + { + "text": "Bon,", + "start": 148.94, + "end": 149.24, + "confidence": 0.804 + }, + { + "text": "je", + "start": 149.26, + "end": 149.34, + "confidence": 0.933 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.46, + "confidence": 0.982 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.992 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.78, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.78, + "end": 149.9, + "confidence": 0.994 + }, + { + "text": "môme,", + "start": 149.9, + "end": 150.1, + "confidence": 0.999 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.5, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 150.5, + "end": 150.84, + "confidence": 0.979 + }, + { + "text": "évidemment", + "start": 150.84, + "end": 151.18, + "confidence": 0.979 + }, + { + "text": "valable", + "start": 151.18, + "end": 151.48, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.64, + "confidence": 0.996 + }, + { + "text": "nous", + "start": 151.64, + "end": 151.78, + "confidence": 0.998 + }, + { + "text": "aussi.", + "start": 151.78, + "end": 152.02, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 15284, + "start": 153.14, + "end": 154.68, + "text": " Donc, rapport inédit, d'accord.", + "tokens": [ + 50389, + 7477, + 11, + 18018, + 294, + 7811, + 270, + 11, + 274, + 6, + 19947, + 13, + 50489 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.814, + "words": [ + { + "text": "Donc,", + "start": 153.14, + "end": 153.24, + "confidence": 0.233 + }, + { + "text": "rapport", + "start": 153.26, + "end": 153.62, + "confidence": 0.854 + }, + { + "text": "inédit,", + "start": 153.62, + "end": 154.18, + "confidence": 0.992 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.68, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15284, + "start": 155.64, + "end": 158.52, + "text": " Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ?", + "tokens": [ + 50489, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.958, + "words": [ + { + "text": "Mais", + "start": 155.64, + "end": 155.88, + "confidence": 0.991 + }, + { + "text": "pourquoi", + "start": 155.88, + "end": 156.36, + "confidence": 0.992 + }, + { + "text": "a-t-on", + "start": 156.36, + "end": 156.7, + "confidence": 0.95 + }, + { + "text": "l'impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.99 + }, + { + "text": "n'en", + "start": 157.28, + "end": 157.5, + "confidence": 0.857 + }, + { + "text": "sortira", + "start": 157.5, + "end": 157.86, + "confidence": 0.974 + }, + { + "text": "jamais ?", + "start": 157.86, + "end": 158.52, + "confidence": 0.997 + } + ] + }, + { + "id": 35, + "seek": 15284, + "start": 159.12, + "end": 165.38, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ?", + "tokens": [ + 50639, + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.959, + "words": [ + { + "text": "Est-ce", + "start": 159.12, + "end": 159.34, + "confidence": 0.987 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.993 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.64, + "confidence": 0.998 + }, + { + "text": "en", + "start": 159.64, + "end": 159.88, + "confidence": 0.961 + }, + { + "text": "remettre", + "start": 159.88, + "end": 160.14, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.14, + "end": 160.46, + "confidence": 0.995 + }, + { + "text": "faute", + "start": 160.46, + "end": 160.64, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.64, + "end": 161.06, + "confidence": 0.982 + }, + { + "text": "les", + "start": 161.06, + "end": 161.3, + "confidence": 0.991 + }, + { + "text": "gens", + "start": 161.3, + "end": 161.46, + "confidence": 0.998 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.56, + "confidence": 0.724 + }, + { + "text": "ont", + "start": 161.56, + "end": 161.82, + "confidence": 0.996 + }, + { + "text": "créé", + "start": 161.82, + "end": 162.28, + "confidence": 0.985 + }, + { + "text": "cet", + "start": 162.28, + "end": 162.48, + "confidence": 0.817 + }, + { + "text": "outil", + "start": 162.48, + "end": 162.78, + "confidence": 0.993 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.36, + "confidence": 0.981 + }, + { + "text": "et", + "start": 163.36, + "end": 163.5, + "confidence": 0.983 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.84, + "confidence": 0.991 + }, + { + "text": "et", + "start": 163.86, + "end": 164.0, + "confidence": 0.554 + }, + { + "text": "diabolique", + "start": 164.0, + "end": 164.4, + "confidence": 0.95 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.927 + }, + { + "text": "que", + "start": 164.66, + "end": 164.86, + "confidence": 0.978 + }, + { + "text": "merveilleux ?", + "start": 164.86, + "end": 165.38, + "confidence": 0.997 + } + ] + }, + { + "id": 36, + "seek": 15284, + "start": 166.84, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 51039, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13, + 51139 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.986, + "words": [ + { + "text": "Les", + "start": 166.84, + "end": 167.08, + "confidence": 0.945 + }, + { + "text": "économistes", + "start": 167.08, + "end": 167.46, + "confidence": 0.997 + }, + { + "text": "parlent", + "start": 167.46, + "end": 167.72, + "confidence": 0.993 + }, + { + "text": "de", + "start": 167.72, + "end": 167.88, + "confidence": 0.992 + }, + { + "text": "dépendance", + "start": 167.88, + "end": 168.34, + "confidence": 0.986 + }, + { + "text": "du", + "start": 168.34, + "end": 168.52, + "confidence": 0.996 + }, + { + "text": "sentier.", + "start": 168.52, + "end": 168.82, + "confidence": 0.983 + } + ] + }, + { + "id": 37, + "seek": 15284, + "start": 168.98, + "end": 177.38, + "text": " C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 51139, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 1131, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.907, + "words": [ + { + "text": "C'est", + "start": 168.98, + "end": 169.2, + "confidence": 0.996 + }, + { + "text": "l'idée", + "start": 169.2, + "end": 169.38, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 169.38, + "end": 169.74, + "confidence": 0.699 + }, + { + "text": "met", + "start": 169.74, + "end": 169.88, + "confidence": 0.648 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.1, + "confidence": 0.995 + }, + { + "text": "un", + "start": 170.1, + "end": 170.52, + "confidence": 0.997 + }, + { + "text": "sentier", + "start": 170.52, + "end": 170.84, + "confidence": 0.991 + }, + { + "text": "qui", + "start": 170.84, + "end": 170.9, + "confidence": 0.955 + }, + { + "text": "a", + "start": 170.9, + "end": 171.0, + "confidence": 0.969 + }, + { + "text": "été", + "start": 171.0, + "end": 171.14, + "confidence": 0.994 + }, + { + "text": "établi,", + "start": 171.14, + "end": 171.5, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.76, + "end": 172.1, + "confidence": 0.527 + }, + { + "text": "volontairement", + "start": 172.1, + "end": 172.7, + "confidence": 0.987 + }, + { + "text": "en", + "start": 172.7, + "end": 172.86, + "confidence": 0.927 + }, + { + "text": "marchant", + "start": 172.86, + "end": 173.1, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.1, + "end": 173.48, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 173.86, + "end": 174.28, + "confidence": 0.748 + }, + { + "text": "en", + "start": 174.28, + "end": 175.16, + "confidence": 0.967 + }, + { + "text": "définissant", + "start": 175.16, + "end": 175.54, + "confidence": 0.981 + }, + { + "text": "des", + "start": 175.54, + "end": 175.74, + "confidence": 0.99 + }, + { + "text": "bornes,", + "start": 175.74, + "end": 175.98, + "confidence": 0.977 + }, + { + "text": "en", + "start": 176.04, + "end": 176.14, + "confidence": 0.674 + }, + { + "text": "définissant", + "start": 176.14, + "end": 176.66, + "confidence": 0.996 + }, + { + "text": "une", + "start": 176.66, + "end": 176.94, + "confidence": 0.983 + }, + { + "text": "signalétique.", + "start": 176.94, + "end": 177.38, + "confidence": 0.686 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.0010143449762836099, + "zh": 5.722542846342549e-05, + "de": 9.583455539541319e-05, + "es": 0.00010201535042142496, + "ru": 3.1112926080822945e-05, + "ko": 5.633822729578242e-05, + "fr": 0.9971169233322144, + "ja": 0.00011926802835660055, + "pt": 0.0002485759323462844, + "tr": 3.1112926080822945e-05, + "pl": 0.0001630200567888096, + "ca": 4.259038988152497e-08, + "nl": 1.564456215419341e-05, + "ar": 9.434877574676648e-05, + "sv": 2.0521199530776357e-06, + "it": 0.0006971484399400651, + "id": 2.9688158974749967e-05, + "hi": 3.2284376629831968e-06, + "fi": 8.965047868514375e-07, + "vi": 5.546478496398777e-05, + "he": 5.610183393400803e-07, + "uk": 1.0875836409240947e-07, + "el": 4.624523171514738e-06, + "ms": 2.939525757028605e-06, + "cs": 1.2841837815358303e-06, + "ro": 3.4907734516309574e-06, + "da": 7.956932535080341e-08, + "hu": 5.3227936405164655e-06, + "ta": 2.3386724024021532e-07, + "no": 1.1047105630268561e-07, + "th": 6.222976480785292e-06, + "ur": 2.0003723477657331e-07, + "hr": 1.4044819529601682e-08, + "bg": 1.8606362672812793e-08, + "lt": 2.3471247168060927e-09, + "la": 5.845945452165324e-06, + "mi": 1.8500422527267801e-07, + "ml": 6.357170718729321e-07, + "cy": 2.3253562630998204e-06, + "sk": 3.2655183446195224e-08, + "te": 2.793145803536845e-08, + "fa": 1.1577267144957659e-07, + "lv": 2.3058828457767078e-10, + "bn": 5.300459804402635e-08, + "sr": 2.1661765436942204e-10, + "az": 5.115824985857387e-10, + "sl": 3.422233874061931e-08, + "kn": 2.2879385885854475e-10, + "et": 1.083020118031186e-09, + "mk": 1.635116070319853e-10, + "br": 5.406615400715964e-06, + "eu": 3.8177777383907596e-08, + "is": 9.119943689128718e-10, + "hy": 2.239642027390687e-09, + "ne": 5.28932808663285e-09, + "mn": 6.634303328922897e-09, + "bs": 3.3884703931619242e-09, + "kk": 1.0393429172861346e-10, + "sq": 3.328931019730419e-10, + "sw": 3.530867331846821e-08, + "gl": 1.5099602990176209e-07, + "mr": 1.1710239444795434e-09, + "pa": 1.961096174341037e-09, + "si": 6.80592506796529e-08, + "km": 3.7744257497251965e-06, + "sn": 1.737953709834983e-07, + "yo": 1.9047600829935618e-08, + "so": 1.3956514763324712e-11, + "af": 7.679762936696477e-10, + "oc": 2.734184931796335e-07, + "ka": 2.9087872735478193e-11, + "be": 2.2796992737994515e-08, + "tg": 3.669329163430435e-12, + "sd": 1.1528690224693605e-09, + "gu": 1.376905534034023e-10, + "am": 8.416855407089585e-11, + "yi": 9.784271171042747e-10, + "lo": 2.8091404757901728e-09, + "uz": 5.215168574768114e-12, + "fo": 1.6905705724212794e-09, + "ht": 4.751301574401623e-08, + "ps": 2.0076020845749554e-09, + "tk": 2.449444466701145e-11, + "nn": 1.7182190276798792e-05, + "mt": 3.048383073744887e-11, + "sa": 2.3386724024021532e-07, + "lb": 6.936016431341452e-12, + "my": 3.0676705620180655e-08, + "bo": 1.2589695508324894e-08, + "tl": 3.4027485185106343e-07, + "mg": 2.9777669913189087e-11, + "as": 2.716994129681183e-10, + "tt": 5.123530722947467e-13, + "haw": 8.965047868514375e-07, + "ln": 1.0415303064448267e-09, + "ha": 8.940879286933878e-12, + "ba": 2.775584449776769e-11, + "jw": 2.3991710804693867e-06, + "su": 2.4252787278467025e-12 + } +} \ No newline at end of file diff --git a/tests/expected/medium_fr.cpu/radio_short.mp3.words.json b/tests/expected/medium_fr.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..55a81cf3b19c05b84cbe3d2f0fc3db1d81a0f42e --- /dev/null +++ b/tests/expected/medium_fr.cpu/radio_short.mp3.words.json @@ -0,0 +1,1977 @@ +{ + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites. Winamax, la référence du poker en ligne. Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité. Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons. Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt? Question posée par les leaders de la CFDT et la CGT. Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT. Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National. La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story. Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite. Musique L'actualité c'est aussi aujourd'hui un dernier adieu, dernier adieu à Laurent Fignon, c'était au cimetière du Père Lachaise à Paris. L'ancien double vainqueur du Tour de France, vaincu par le cancer à 50 ans, a été incinéré en petit comité aujourd'hui.", + "segments": [ + { + "id": 0, + "seek": 3000, + "start": 30.86, + "end": 34.26, + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites.", + "tokens": [ + 1456, + 1804, + 1021, + 1609, + 36863, + 1769, + 408, + 4900, + 1736, + 1512, + 5467, + 279, + 11, + 269, + 6, + 377, + 1769, + 631, + 2630, + 465, + 29902, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.92, + "words": [ + { + "text": "Le", + "start": 30.86, + "end": 31.02, + "confidence": 0.535 + }, + { + "text": "plus", + "start": 31.02, + "end": 31.12, + "confidence": 0.99 + }, + { + "text": "important", + "start": 31.12, + "end": 31.48, + "confidence": 0.993 + }, + { + "text": "au", + "start": 31.48, + "end": 31.64, + "confidence": 0.946 + }, + { + "text": "poker", + "start": 31.64, + "end": 31.84, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 31.84, + "end": 32.02, + "confidence": 0.581 + }, + { + "text": "ne", + "start": 32.02, + "end": 32.06, + "confidence": 0.961 + }, + { + "text": "sont", + "start": 32.06, + "end": 32.16, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 32.16, + "end": 32.28, + "confidence": 0.991 + }, + { + "text": "les", + "start": 32.28, + "end": 32.42, + "confidence": 0.969 + }, + { + "text": "cartes,", + "start": 32.42, + "end": 33.56, + "confidence": 0.996 + }, + { + "text": "c'est", + "start": 33.56, + "end": 33.62, + "confidence": 0.95 + }, + { + "text": "ce", + "start": 33.62, + "end": 33.66, + "confidence": 0.99 + }, + { + "text": "que", + "start": 33.66, + "end": 33.7, + "confidence": 0.991 + }, + { + "text": "vous", + "start": 33.7, + "end": 33.86, + "confidence": 0.991 + }, + { + "text": "en", + "start": 33.86, + "end": 34.04, + "confidence": 0.982 + }, + { + "text": "faites.", + "start": 34.04, + "end": 34.26, + "confidence": 0.894 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 36.18, + "end": 38.76, + "text": " Winamax, la référence du poker en ligne.", + "tokens": [ + 10427, + 2404, + 87, + 11, + 635, + 30170, + 41635, + 1581, + 36863, + 465, + 34207, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.967, + "words": [ + { + "text": "Winamax,", + "start": 36.18, + "end": 37.28, + "confidence": 0.905 + }, + { + "text": "la", + "start": 37.28, + "end": 37.32, + "confidence": 0.988 + }, + { + "text": "référence", + "start": 37.32, + "end": 37.78, + "confidence": 0.998 + }, + { + "text": "du", + "start": 37.78, + "end": 37.96, + "confidence": 0.992 + }, + { + "text": "poker", + "start": 37.96, + "end": 38.22, + "confidence": 0.998 + }, + { + "text": "en", + "start": 38.22, + "end": 38.4, + "confidence": 0.993 + }, + { + "text": "ligne.", + "start": 38.4, + "end": 38.76, + "confidence": 0.993 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 44.94, + "end": 51.38, + "text": " Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité.", + "tokens": [ + 7368, + 539, + 347, + 1531, + 14437, + 1030, + 8317, + 11, + 2630, + 18935, + 1022, + 363, + 37, + 44, + 3558, + 11, + 4666, + 25232, + 465, + 2047, + 11, + 269, + 6, + 377, + 3610, + 18143, + 363, + 37, + 44, + 1657, + 4163, + 3486, + 1769, + 1956, + 3887, + 287, + 6, + 578, + 901, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.907, + "words": [ + { + "text": "Bonsoir", + "start": 44.94, + "end": 45.4, + "confidence": 0.955 + }, + { + "text": "à", + "start": 45.4, + "end": 45.84, + "confidence": 0.54 + }, + { + "text": "toutes", + "start": 45.84, + "end": 46.06, + "confidence": 0.917 + }, + { + "text": "et", + "start": 46.06, + "end": 46.14, + "confidence": 0.988 + }, + { + "text": "tous,", + "start": 46.14, + "end": 46.7, + "confidence": 0.981 + }, + { + "text": "vous", + "start": 46.7, + "end": 46.74, + "confidence": 0.97 + }, + { + "text": "êtes", + "start": 46.74, + "end": 46.9, + "confidence": 0.995 + }, + { + "text": "sur", + "start": 46.9, + "end": 47.04, + "confidence": 0.989 + }, + { + "text": "BFM", + "start": 47.04, + "end": 47.56, + "confidence": 0.987 + }, + { + "text": "TV,", + "start": 47.56, + "end": 47.88, + "confidence": 0.853 + }, + { + "text": "nous", + "start": 47.88, + "end": 47.92, + "confidence": 0.829 + }, + { + "text": "sommes", + "start": 47.92, + "end": 48.14, + "confidence": 0.988 + }, + { + "text": "en", + "start": 48.14, + "end": 48.2, + "confidence": 0.986 + }, + { + "text": "direct,", + "start": 48.2, + "end": 48.74, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 48.74, + "end": 48.8, + "confidence": 0.939 + }, + { + "text": "bien", + "start": 48.8, + "end": 48.94, + "confidence": 0.971 + }, + { + "text": "sûr", + "start": 48.94, + "end": 49.2, + "confidence": 0.906 + }, + { + "text": "BFM", + "start": 49.2, + "end": 49.84, + "confidence": 0.95 + }, + { + "text": "story", + "start": 49.84, + "end": 50.04, + "confidence": 0.356 + }, + { + "text": "avec", + "start": 50.04, + "end": 50.24, + "confidence": 0.553 + }, + { + "text": "tout", + "start": 50.24, + "end": 50.42, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 50.42, + "end": 50.46, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 50.46, + "end": 50.56, + "confidence": 0.952 + }, + { + "text": "fait", + "start": 50.56, + "end": 50.72, + "confidence": 0.961 + }, + { + "text": "l'actualité.", + "start": 50.72, + "end": 51.38, + "confidence": 0.994 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 51.5, + "end": 56.11, + "text": " Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons.", + "tokens": [ + 13710, + 394, + 4060, + 2077, + 1769, + 4900, + 730, + 18638, + 5482, + 11, + 730, + 37560, + 11, + 730, + 3960, + 12299, + 631, + 4666, + 2630, + 7532, + 892, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.947, + "words": [ + { + "text": "Durant", + "start": 51.5, + "end": 52.3, + "confidence": 0.935 + }, + { + "text": "60", + "start": 52.3, + "end": 52.7, + "confidence": 0.922 + }, + { + "text": "minutes", + "start": 52.7, + "end": 53.08, + "confidence": 0.912 + }, + { + "text": "ce", + "start": 53.08, + "end": 53.26, + "confidence": 0.752 + }, + { + "text": "sont", + "start": 53.26, + "end": 53.58, + "confidence": 0.979 + }, + { + "text": "des", + "start": 53.58, + "end": 53.86, + "confidence": 0.99 + }, + { + "text": "gros", + "start": 53.86, + "end": 54.02, + "confidence": 0.969 + }, + { + "text": "plans,", + "start": 54.02, + "end": 54.32, + "confidence": 0.958 + }, + { + "text": "des", + "start": 54.32, + "end": 54.44, + "confidence": 0.986 + }, + { + "text": "analyses,", + "start": 54.44, + "end": 54.78, + "confidence": 0.913 + }, + { + "text": "des", + "start": 54.78, + "end": 54.86, + "confidence": 0.995 + }, + { + "text": "réactions", + "start": 54.86, + "end": 55.22, + "confidence": 0.996 + }, + { + "text": "que", + "start": 55.22, + "end": 55.4, + "confidence": 0.882 + }, + { + "text": "nous", + "start": 55.4, + "end": 55.54, + "confidence": 0.947 + }, + { + "text": "vous", + "start": 55.54, + "end": 55.64, + "confidence": 0.99 + }, + { + "text": "proposons.", + "start": 55.64, + "end": 56.11, + "confidence": 0.991 + } + ] + }, + { + "id": 4, + "seek": 5600, + "start": 56.11, + "end": 63.98, + "text": " Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt?", + "tokens": [ + 16328, + 9336, + 4281, + 392, + 5977, + 12, + 388, + 10122, + 29350, + 268, + 347, + 635, + 3960, + 44562, + 730, + 49356, + 3324, + 11246, + 421, + 6, + 388, + 871, + 1741, + 2781, + 526, + 2680, + 601, + 35221, + 2096, + 9020, + 11, + 287, + 6, + 2518, + 9020, + 4281, + 392, + 12, + 33, + 3093, + 4463, + 33403, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.858, + "words": [ + { + "text": "Comment", + "start": 56.11, + "end": 56.94, + "confidence": 0.966 + }, + { + "text": "Eric", + "start": 56.94, + "end": 57.2, + "confidence": 0.888 + }, + { + "text": "Verth", + "start": 57.2, + "end": 57.36, + "confidence": 0.376 + }, + { + "text": "peut-il", + "start": 57.36, + "end": 57.8, + "confidence": 0.951 + }, + { + "text": "encore", + "start": 57.8, + "end": 58.36, + "confidence": 0.971 + }, + { + "text": "soutenir", + "start": 58.36, + "end": 58.62, + "confidence": 0.997 + }, + { + "text": "la", + "start": 58.62, + "end": 58.72, + "confidence": 0.98 + }, + { + "text": "réforme", + "start": 58.72, + "end": 59.18, + "confidence": 0.996 + }, + { + "text": "des", + "start": 59.18, + "end": 59.38, + "confidence": 0.994 + }, + { + "text": "retraites", + "start": 59.38, + "end": 59.78, + "confidence": 0.988 + }, + { + "text": "alors", + "start": 59.78, + "end": 59.94, + "confidence": 0.508 + }, + { + "text": "qu'il", + "start": 59.94, + "end": 60.72, + "confidence": 0.965 + }, + { + "text": "est", + "start": 60.72, + "end": 60.86, + "confidence": 0.975 + }, + { + "text": "englué", + "start": 60.86, + "end": 61.74, + "confidence": 0.945 + }, + { + "text": "dans", + "start": 61.74, + "end": 61.92, + "confidence": 0.97 + }, + { + "text": "sa", + "start": 61.92, + "end": 62.42, + "confidence": 0.871 + }, + { + "text": "propre", + "start": 62.42, + "end": 62.74, + "confidence": 0.992 + }, + { + "text": "affaire,", + "start": 62.74, + "end": 62.98, + "confidence": 0.993 + }, + { + "text": "l'affaire", + "start": 62.98, + "end": 63.24, + "confidence": 0.903 + }, + { + "text": "Verth-Bettancourt?", + "start": 63.24, + "end": 63.98, + "confidence": 0.714 + } + ] + }, + { + "id": 5, + "seek": 5600, + "start": 64.0, + "end": 67.18, + "text": " Question posée par les leaders de la CFDT et la CGT.", + "tokens": [ + 14464, + 1366, + 3856, + 971, + 1512, + 3523, + 368, + 635, + 21792, + 35, + 51, + 1030, + 635, + 38007, + 51, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.896, + "words": [ + { + "text": "Question", + "start": 64.0, + "end": 64.3, + "confidence": 0.639 + }, + { + "text": "posée", + "start": 64.3, + "end": 64.76, + "confidence": 0.977 + }, + { + "text": "par", + "start": 64.76, + "end": 64.94, + "confidence": 0.989 + }, + { + "text": "les", + "start": 64.94, + "end": 65.08, + "confidence": 0.966 + }, + { + "text": "leaders", + "start": 65.08, + "end": 65.66, + "confidence": 0.977 + }, + { + "text": "de", + "start": 65.66, + "end": 65.9, + "confidence": 0.988 + }, + { + "text": "la", + "start": 65.9, + "end": 66.1, + "confidence": 0.967 + }, + { + "text": "CFDT", + "start": 66.1, + "end": 66.54, + "confidence": 0.994 + }, + { + "text": "et", + "start": 66.54, + "end": 66.62, + "confidence": 0.586 + }, + { + "text": "la", + "start": 66.62, + "end": 66.66, + "confidence": 0.627 + }, + { + "text": "CGT.", + "start": 66.66, + "end": 67.18, + "confidence": 0.987 + } + ] + }, + { + "id": 6, + "seek": 5600, + "start": 67.44, + "end": 76.42, + "text": " Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT.", + "tokens": [ + 41587, + 3739, + 368, + 38268, + 318, + 809, + 78, + 1229, + 11, + 9336, + 4281, + 392, + 1515, + 23833, + 476, + 2795, + 11980, + 1022, + 1512, + 49356, + 3324, + 11, + 322, + 465, + 18508, + 2680, + 363, + 37, + 44, + 1657, + 4163, + 476, + 49525, + 368, + 635, + 21792, + 35, + 51, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.968, + "words": [ + { + "text": "Réponse", + "start": 67.44, + "end": 67.94, + "confidence": 0.957 + }, + { + "text": "de", + "start": 67.94, + "end": 68.1, + "confidence": 0.966 + }, + { + "text": "Nicolas", + "start": 68.1, + "end": 68.36, + "confidence": 0.934 + }, + { + "text": "Sarkozy,", + "start": 68.36, + "end": 69.06, + "confidence": 0.988 + }, + { + "text": "Eric", + "start": 69.06, + "end": 69.24, + "confidence": 0.962 + }, + { + "text": "Verth", + "start": 69.24, + "end": 69.8, + "confidence": 0.988 + }, + { + "text": "portera", + "start": 69.8, + "end": 70.64, + "confidence": 0.966 + }, + { + "text": "le", + "start": 70.64, + "end": 71.0, + "confidence": 0.99 + }, + { + "text": "débat", + "start": 71.0, + "end": 71.4, + "confidence": 0.996 + }, + { + "text": "sur", + "start": 71.4, + "end": 71.66, + "confidence": 0.99 + }, + { + "text": "les", + "start": 71.66, + "end": 71.82, + "confidence": 0.997 + }, + { + "text": "retraites,", + "start": 71.82, + "end": 72.34, + "confidence": 0.996 + }, + { + "text": "on", + "start": 72.34, + "end": 72.46, + "confidence": 0.842 + }, + { + "text": "en", + "start": 72.46, + "end": 72.74, + "confidence": 0.948 + }, + { + "text": "parle", + "start": 72.74, + "end": 72.92, + "confidence": 0.996 + }, + { + "text": "dans", + "start": 72.92, + "end": 73.42, + "confidence": 0.983 + }, + { + "text": "BFM", + "start": 73.42, + "end": 74.6, + "confidence": 0.992 + }, + { + "text": "story", + "start": 74.6, + "end": 74.7, + "confidence": 0.756 + }, + { + "text": "avec", + "start": 74.7, + "end": 75.08, + "confidence": 0.837 + }, + { + "text": "le", + "start": 75.08, + "end": 75.26, + "confidence": 0.984 + }, + { + "text": "numéro", + "start": 75.26, + "end": 75.52, + "confidence": 0.997 + }, + { + "text": "de", + "start": 75.52, + "end": 75.76, + "confidence": 0.996 + }, + { + "text": "la", + "start": 75.76, + "end": 75.9, + "confidence": 0.99 + }, + { + "text": "CFDT.", + "start": 75.9, + "end": 76.42, + "confidence": 0.998 + } + ] + }, + { + "id": 7, + "seek": 5600, + "start": 76.5, + "end": 83.0, + "text": " Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National.", + "tokens": [ + 3790, + 9093, + 1930, + 288, + 257, + 2251, + 272, + 3274, + 3409, + 1956, + 257, + 22761, + 2284, + 526, + 11, + 25722, + 3962, + 20415, + 1456, + 10571, + 1030, + 23046, + 460, + 2912, + 480, + 11, + 635, + 272, + 3274, + 3409, + 368, + 635, + 36624, + 368, + 13854, + 12, + 16639, + 414, + 1456, + 10571, + 1531, + 635, + 24661, + 1581, + 17348, + 4862, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.872, + "words": [ + { + "text": "Et", + "start": 76.5, + "end": 76.96, + "confidence": 0.912 + }, + { + "text": "puis", + "start": 76.96, + "end": 77.08, + "confidence": 0.958 + }, + { + "text": "il", + "start": 77.08, + "end": 77.14, + "confidence": 0.794 + }, + { + "text": "y", + "start": 77.14, + "end": 77.18, + "confidence": 0.994 + }, + { + "text": "a", + "start": 77.18, + "end": 77.22, + "confidence": 0.986 + }, + { + "text": "une", + "start": 77.22, + "end": 77.3, + "confidence": 0.996 + }, + { + "text": "bataille", + "start": 77.3, + "end": 77.56, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 77.56, + "end": 77.68, + "confidence": 0.997 + }, + { + "text": "a", + "start": 77.68, + "end": 77.72, + "confidence": 0.985 + }, + { + "text": "démarré,", + "start": 77.72, + "end": 78.16, + "confidence": 0.987 + }, + { + "text": "celle", + "start": 78.16, + "end": 78.38, + "confidence": 0.997 + }, + { + "text": "entre", + "start": 78.38, + "end": 78.64, + "confidence": 0.976 + }, + { + "text": "Marine", + "start": 78.64, + "end": 78.96, + "confidence": 0.987 + }, + { + "text": "Le", + "start": 78.96, + "end": 79.12, + "confidence": 0.986 + }, + { + "text": "Pen", + "start": 79.12, + "end": 79.16, + "confidence": 0.987 + }, + { + "text": "et", + "start": 79.16, + "end": 79.32, + "confidence": 0.997 + }, + { + "text": "Bruno", + "start": 79.32, + "end": 79.54, + "confidence": 0.986 + }, + { + "text": "Gognich,", + "start": 79.54, + "end": 80.2, + "confidence": 0.24 + }, + { + "text": "la", + "start": 80.2, + "end": 80.32, + "confidence": 0.676 + }, + { + "text": "bataille", + "start": 80.32, + "end": 80.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 80.54, + "end": 80.72, + "confidence": 0.975 + }, + { + "text": "la", + "start": 80.72, + "end": 80.92, + "confidence": 0.988 + }, + { + "text": "succession", + "start": 80.92, + "end": 81.18, + "confidence": 0.983 + }, + { + "text": "de", + "start": 81.18, + "end": 81.44, + "confidence": 0.984 + }, + { + "text": "Jean-Marie", + "start": 81.44, + "end": 81.7, + "confidence": 0.95 + }, + { + "text": "Le", + "start": 81.7, + "end": 81.94, + "confidence": 0.996 + }, + { + "text": "Pen", + "start": 81.94, + "end": 81.98, + "confidence": 0.999 + }, + { + "text": "à", + "start": 81.98, + "end": 82.12, + "confidence": 0.976 + }, + { + "text": "la", + "start": 82.12, + "end": 82.28, + "confidence": 0.995 + }, + { + "text": "tête", + "start": 82.28, + "end": 82.32, + "confidence": 0.926 + }, + { + "text": "du", + "start": 82.32, + "end": 82.48, + "confidence": 0.997 + }, + { + "text": "Front", + "start": 82.48, + "end": 82.64, + "confidence": 0.775 + }, + { + "text": "National.", + "start": 82.64, + "end": 83.0, + "confidence": 0.836 + } + ] + }, + { + "id": 8, + "seek": 8300, + "start": 83.3, + "end": 88.82, + "text": " La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story.", + "tokens": [ + 2369, + 3512, + 77, + 3856, + 368, + 2255, + 13887, + 368, + 20415, + 1456, + 10571, + 18137, + 14023, + 6, + 10556, + 2680, + 476, + 14662, + 11, + 20415, + 1456, + 10571, + 15021, + 465, + 2047, + 2680, + 363, + 37, + 44, + 1657, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10741670781915838, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.00011412434105295688, + "confidence": 0.958, + "words": [ + { + "text": "La", + "start": 83.3, + "end": 83.56, + "confidence": 0.98 + }, + { + "text": "tournée", + "start": 83.56, + "end": 84.14, + "confidence": 0.995 + }, + { + "text": "de", + "start": 84.14, + "end": 84.36, + "confidence": 0.947 + }, + { + "text": "campagne", + "start": 84.36, + "end": 84.84, + "confidence": 0.984 + }, + { + "text": "de", + "start": 84.84, + "end": 84.9, + "confidence": 0.977 + }, + { + "text": "Marine", + "start": 84.9, + "end": 85.18, + "confidence": 0.997 + }, + { + "text": "Le", + "start": 85.18, + "end": 85.26, + "confidence": 0.998 + }, + { + "text": "Pen", + "start": 85.26, + "end": 85.44, + "confidence": 0.999 + }, + { + "text": "commence", + "start": 85.44, + "end": 85.78, + "confidence": 0.98 + }, + { + "text": "aujourd'hui", + "start": 85.78, + "end": 86.14, + "confidence": 0.991 + }, + { + "text": "dans", + "start": 86.14, + "end": 86.24, + "confidence": 0.984 + }, + { + "text": "le", + "start": 86.24, + "end": 86.38, + "confidence": 0.913 + }, + { + "text": "Var,", + "start": 86.38, + "end": 86.82, + "confidence": 0.523 + }, + { + "text": "Marine", + "start": 86.82, + "end": 86.96, + "confidence": 0.993 + }, + { + "text": "Le", + "start": 86.96, + "end": 87.04, + "confidence": 0.993 + }, + { + "text": "Pen", + "start": 87.04, + "end": 87.22, + "confidence": 0.999 + }, + { + "text": "sera", + "start": 87.22, + "end": 87.48, + "confidence": 0.991 + }, + { + "text": "en", + "start": 87.48, + "end": 87.68, + "confidence": 0.991 + }, + { + "text": "direct", + "start": 87.68, + "end": 87.98, + "confidence": 0.997 + }, + { + "text": "dans", + "start": 87.98, + "end": 88.3, + "confidence": 0.935 + }, + { + "text": "BFM", + "start": 88.3, + "end": 88.72, + "confidence": 0.997 + }, + { + "text": "story.", + "start": 88.72, + "end": 88.82, + "confidence": 0.827 + } + ] + }, + { + "id": 9, + "seek": 8300, + "start": 89.0, + "end": 92.15, + "text": " Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite.", + "tokens": [ + 13094, + 4371, + 4163, + 4666, + 20415, + 1456, + 10571, + 2680, + 13099, + 368, + 805, + 2077, + 11, + 1531, + 3486, + 368, + 14205, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10741670781915838, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.00011412434105295688, + "confidence": 0.937, + "words": [ + { + "text": "Restez", + "start": 89.0, + "end": 89.64, + "confidence": 0.966 + }, + { + "text": "avec", + "start": 89.64, + "end": 89.68, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 89.68, + "end": 89.94, + "confidence": 0.968 + }, + { + "text": "Marine", + "start": 89.94, + "end": 90.18, + "confidence": 0.906 + }, + { + "text": "Le", + "start": 90.18, + "end": 90.4, + "confidence": 0.998 + }, + { + "text": "Pen", + "start": 90.4, + "end": 90.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 90.68, + "end": 90.88, + "confidence": 0.836 + }, + { + "text": "moins", + "start": 90.88, + "end": 91.18, + "confidence": 0.993 + }, + { + "text": "de", + "start": 91.18, + "end": 91.28, + "confidence": 0.991 + }, + { + "text": "3", + "start": 91.28, + "end": 91.46, + "confidence": 0.609 + }, + { + "text": "minutes,", + "start": 91.46, + "end": 91.86, + "confidence": 0.986 + }, + { + "text": "à", + "start": 91.86, + "end": 91.9, + "confidence": 0.962 + }, + { + "text": "tout", + "start": 91.9, + "end": 92.04, + "confidence": 0.918 + }, + { + "text": "de", + "start": 92.04, + "end": 92.1, + "confidence": 0.999 + }, + { + "text": "suite.", + "start": 92.1, + "end": 92.15, + "confidence": 0.999 + } + ] + }, + { + "id": 10, + "seek": 9200, + "start": 92.15, + "end": 93.74, + "text": " Musique", + "tokens": [ + 3569, + 1925 + ], + "temperature": 0.0, + "avg_logprob": -0.15070751414579503, + "compression_ratio": 1.4545454545454546, + "no_speech_prob": 8.21087378426455e-05, + "confidence": 0.328, + "words": [ + { + "text": "Musique", + "start": 92.15, + "end": 93.74, + "confidence": 0.328 + } + ] + }, + { + "id": 11, + "seek": 9200, + "start": 106.7, + "end": 113.5, + "text": " L'actualité c'est aussi aujourd'hui un dernier adieu, dernier adieu à Laurent Fignon, c'était au cimetière du Père Lachaise à Paris.", + "tokens": [ + 441, + 6, + 578, + 901, + 5066, + 269, + 6, + 377, + 6212, + 14023, + 6, + 10556, + 517, + 29332, + 614, + 19347, + 11, + 29332, + 614, + 19347, + 1531, + 49357, + 479, + 41846, + 11, + 269, + 6, + 9743, + 1609, + 269, + 26123, + 10195, + 1581, + 430, + 4212, + 441, + 27442, + 908, + 1531, + 8380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.15070751414579503, + "compression_ratio": 1.4545454545454546, + "no_speech_prob": 8.21087378426455e-05, + "confidence": 0.898, + "words": [ + { + "text": "L'actualité", + "start": 106.7, + "end": 107.44, + "confidence": 0.868 + }, + { + "text": "c'est", + "start": 107.44, + "end": 107.66, + "confidence": 0.84 + }, + { + "text": "aussi", + "start": 107.66, + "end": 108.02, + "confidence": 0.804 + }, + { + "text": "aujourd'hui", + "start": 108.02, + "end": 108.38, + "confidence": 0.928 + }, + { + "text": "un", + "start": 108.38, + "end": 108.48, + "confidence": 0.887 + }, + { + "text": "dernier", + "start": 108.48, + "end": 108.8, + "confidence": 0.991 + }, + { + "text": "adieu,", + "start": 108.8, + "end": 109.2, + "confidence": 0.982 + }, + { + "text": "dernier", + "start": 109.2, + "end": 109.38, + "confidence": 0.501 + }, + { + "text": "adieu", + "start": 109.38, + "end": 109.96, + "confidence": 0.997 + }, + { + "text": "à", + "start": 109.96, + "end": 110.12, + "confidence": 0.978 + }, + { + "text": "Laurent", + "start": 110.12, + "end": 110.42, + "confidence": 0.94 + }, + { + "text": "Fignon,", + "start": 110.42, + "end": 111.12, + "confidence": 0.816 + }, + { + "text": "c'était", + "start": 111.12, + "end": 111.24, + "confidence": 0.972 + }, + { + "text": "au", + "start": 111.24, + "end": 111.46, + "confidence": 0.99 + }, + { + "text": "cimetière", + "start": 111.46, + "end": 111.94, + "confidence": 0.984 + }, + { + "text": "du", + "start": 111.94, + "end": 112.14, + "confidence": 0.963 + }, + { + "text": "Père", + "start": 112.14, + "end": 112.36, + "confidence": 0.892 + }, + { + "text": "Lachaise", + "start": 112.36, + "end": 112.88, + "confidence": 0.864 + }, + { + "text": "à", + "start": 112.88, + "end": 113.12, + "confidence": 0.795 + }, + { + "text": "Paris.", + "start": 113.12, + "end": 113.5, + "confidence": 0.998 + } + ] + }, + { + "id": 12, + "seek": 11400, + "start": 114.02, + "end": 120.46, + "text": " L'ancien double vainqueur du Tour de France, vaincu par le cancer à 50 ans, a été incinéré en petit comité aujourd'hui.", + "tokens": [ + 50364, + 441, + 6, + 38840, + 268, + 3834, + 22240, + 1077, + 374, + 1581, + 13077, + 368, + 6190, + 11, + 22240, + 12032, + 971, + 476, + 5592, + 1531, + 2625, + 1567, + 11, + 257, + 8862, + 834, + 259, + 29071, + 465, + 9686, + 395, + 5066, + 14023, + 6, + 10556, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.10655183541147333, + "compression_ratio": 1.1160714285714286, + "no_speech_prob": 0.0002510416379664093, + "confidence": 0.933, + "words": [ + { + "text": "L'ancien", + "start": 114.02, + "end": 114.36, + "confidence": 0.735 + }, + { + "text": "double", + "start": 114.36, + "end": 114.64, + "confidence": 0.991 + }, + { + "text": "vainqueur", + "start": 114.64, + "end": 115.08, + "confidence": 0.996 + }, + { + "text": "du", + "start": 115.08, + "end": 115.18, + "confidence": 0.971 + }, + { + "text": "Tour", + "start": 115.18, + "end": 115.36, + "confidence": 0.828 + }, + { + "text": "de", + "start": 115.36, + "end": 115.5, + "confidence": 0.992 + }, + { + "text": "France,", + "start": 115.5, + "end": 116.32, + "confidence": 0.999 + }, + { + "text": "vaincu", + "start": 116.32, + "end": 116.48, + "confidence": 0.989 + }, + { + "text": "par", + "start": 116.48, + "end": 116.6, + "confidence": 0.997 + }, + { + "text": "le", + "start": 116.6, + "end": 116.74, + "confidence": 0.994 + }, + { + "text": "cancer", + "start": 116.74, + "end": 117.06, + "confidence": 0.966 + }, + { + "text": "à", + "start": 117.06, + "end": 117.24, + "confidence": 0.964 + }, + { + "text": "50", + "start": 117.24, + "end": 117.54, + "confidence": 0.977 + }, + { + "text": "ans,", + "start": 117.54, + "end": 118.34, + "confidence": 0.989 + }, + { + "text": "a", + "start": 118.34, + "end": 118.38, + "confidence": 0.989 + }, + { + "text": "été", + "start": 118.38, + "end": 118.56, + "confidence": 0.996 + }, + { + "text": "incinéré", + "start": 118.56, + "end": 119.24, + "confidence": 0.965 + }, + { + "text": "en", + "start": 119.24, + "end": 119.48, + "confidence": 0.913 + }, + { + "text": "petit", + "start": 119.48, + "end": 119.74, + "confidence": 0.883 + }, + { + "text": "comité", + "start": 119.74, + "end": 120.18, + "confidence": 0.978 + }, + { + "text": "aujourd'hui.", + "start": 120.18, + "end": 120.46, + "confidence": 0.929 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr.cpu/smartphone.mp3.words.json b/tests/expected/medium_fr.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9faa961d5cf4a38b6ac04b76010aeaddb28cee89 --- /dev/null +++ b/tests/expected/medium_fr.cpu/smartphone.mp3.words.json @@ -0,0 +1,4802 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc, à objet inédit, rapport inédit. Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Et bien, le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 3.62, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.38, + "end": 0.58, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.58, + "end": 0.88, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.88, + "end": 1.02, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.02, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.78, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.78, + "end": 1.9, + "confidence": 0.979 + }, + { + "text": "je", + "start": 1.9, + "end": 2.24, + "confidence": 0.982 + }, + { + "text": "ne", + "start": 2.24, + "end": 2.34, + "confidence": 0.835 + }, + { + "text": "me", + "start": 2.34, + "end": 2.38, + "confidence": 0.82 + }, + { + "text": "l'étais", + "start": 2.38, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.84, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.84, + "end": 3.26, + "confidence": 0.909 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.42, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.42, + "end": 3.62, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.08, + "end": 7.92, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.08, + "end": 4.26, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.34, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.48, + "confidence": 0.565 + }, + { + "text": "la", + "start": 4.48, + "end": 4.66, + "confidence": 0.971 + }, + { + "text": "force", + "start": 4.66, + "end": 5.0, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.0, + "end": 5.2, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.88, + "confidence": 0.911 + }, + { + "text": "c'est", + "start": 5.88, + "end": 6.12, + "confidence": 0.88 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.26, + "confidence": 0.992 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.52, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.52, + "end": 7.38, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.38, + "end": 7.56, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 7.92, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.44, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.44, + "end": 8.6, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.6, + "end": 8.9, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.9, + "end": 9.1, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.1, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.32, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.32, + "end": 10.58, + "confidence": 0.956 + }, + { + "text": "elles.", + "start": 10.58, + "end": 10.88, + "confidence": 0.99 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.607 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.22, + "confidence": 0.765 + }, + { + "text": "dit", + "start": 11.22, + "end": 11.4, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.4, + "end": 11.56, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.56, + "end": 11.72, + "confidence": 0.477 + }, + { + "text": "la", + "start": 11.72, + "end": 11.78, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.78, + "end": 12.12, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.12, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.42, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.42, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 16.02, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.913, + "words": [ + { + "text": "Alors", + "start": 13.34, + "end": 13.62, + "confidence": 0.585 + }, + { + "text": "évidemment,", + "start": 13.62, + "end": 14.34, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.34, + "end": 14.38, + "confidence": 0.952 + }, + { + "text": "faudrait", + "start": 14.38, + "end": 14.74, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.74, + "end": 15.16, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.16, + "end": 15.52, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.52, + "end": 16.02, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.7, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.7, + "end": 17.06, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.06, + "end": 17.26, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.26, + "end": 17.88, + "confidence": 0.975 + }, + { + "text": "beaucoup", + "start": 17.88, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 19.84, + "end": 25.26, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 19.84, + "end": 20.22, + "confidence": 0.944 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.52, + "confidence": 0.667 + }, + { + "text": "il", + "start": 20.52, + "end": 20.6, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.6, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.84, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.84, + "end": 20.96, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 20.96, + "end": 21.26, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.68, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.68, + "end": 21.9, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.9, + "end": 22.1, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.1, + "end": 22.32, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.48, + "confidence": 0.72 + }, + { + "text": "lesquels", + "start": 22.48, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.04, + "confidence": 0.709 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.965 + }, + { + "text": "nous", + "start": 23.54, + "end": 23.72, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.72, + "end": 23.82, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.82, + "end": 24.1, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.1, + "end": 24.24, + "confidence": 0.829 + }, + { + "text": "cliquer", + "start": 24.24, + "end": 24.5, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.5, + "end": 24.66, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.66, + "end": 24.94, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.94, + "end": 25.26, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.42, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Sauf", + "start": 25.42, + "end": 25.76, + "confidence": 0.99 + }, + { + "text": "que", + "start": 25.76, + "end": 26.26, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.26, + "end": 26.66, + "confidence": 0.631 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.06, + "confidence": 0.996 + }, + { + "text": "ajoute", + "start": 27.06, + "end": 27.44, + "confidence": 0.991 + }, + { + "text": "le", + "start": 27.44, + "end": 27.62, + "confidence": 0.992 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.18, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 28.18, + "end": 28.22, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 28.22, + "end": 28.28, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.28, + "end": 28.48, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.48, + "end": 28.68, + "confidence": 0.993 + }, + { + "text": "contact", + "start": 28.68, + "end": 29.1, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.1, + "end": 29.46, + "confidence": 0.985 + }, + { + "text": "direct,", + "start": 29.46, + "end": 30.22, + "confidence": 0.995 + }, + { + "text": "plus", + "start": 30.22, + "end": 30.26, + "confidence": 0.994 + }, + { + "text": "sensible.", + "start": 30.26, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.892, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.22, + "confidence": 0.97 + }, + { + "text": "puis", + "start": 31.22, + "end": 31.36, + "confidence": 0.971 + }, + { + "text": "évidemment,", + "start": 31.36, + "end": 31.7, + "confidence": 0.875 + }, + { + "text": "il", + "start": 31.7, + "end": 31.74, + "confidence": 0.993 + }, + { + "text": "faudrait", + "start": 31.74, + "end": 31.94, + "confidence": 0.995 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.12, + "confidence": 0.84 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.34, + "confidence": 0.977 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.994 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.9, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.9, + "end": 33.18, + "confidence": 0.481 + }, + { + "text": "permettent", + "start": 33.18, + "end": 33.74, + "confidence": 0.992 + }, + { + "text": "de", + "start": 33.74, + "end": 33.96, + "confidence": 0.885 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.42, + "confidence": 0.958 + }, + { + "text": "le", + "start": 34.42, + "end": 34.52, + "confidence": 0.775 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.8, + "confidence": 0.984 + }, + { + "text": "touffu", + "start": 34.8, + "end": 35.32, + "confidence": 0.741 + }, + { + "text": "de", + "start": 35.32, + "end": 35.72, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.72, + "end": 35.78, + "confidence": 0.992 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.994 + }, + { + "text": "web", + "start": 36.24, + "end": 36.6, + "confidence": 0.854 + }, + { + "text": "pour", + "start": 36.6, + "end": 36.78, + "confidence": 0.585 + }, + { + "text": "aller", + "start": 36.78, + "end": 36.98, + "confidence": 0.987 + }, + { + "text": "directement", + "start": 36.98, + "end": 37.52, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.52, + "end": 37.68, + "confidence": 0.967 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.82, + "end": 46.54, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.974, + "words": [ + { + "text": "Bref,", + "start": 37.82, + "end": 38.76, + "confidence": 0.987 + }, + { + "text": "tout", + "start": 38.76, + "end": 38.98, + "confidence": 0.711 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.42, + "end": 39.7, + "confidence": 0.993 + }, + { + "text": "sont", + "start": 39.7, + "end": 39.88, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.88, + "end": 40.16, + "confidence": 0.991 + }, + { + "text": "conditions", + "start": 40.16, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.68, + "end": 40.96, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.46, + "confidence": 0.997 + }, + { + "text": "de", + "start": 41.46, + "end": 41.6, + "confidence": 0.998 + }, + { + "text": "créer", + "start": 41.6, + "end": 42.06, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.06, + "end": 42.38, + "confidence": 0.998 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.6, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.6, + "end": 42.8, + "confidence": 0.779 + }, + { + "text": "Nicolas", + "start": 42.8, + "end": 43.26, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 43.26, + "end": 43.5, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.5, + "end": 43.7, + "confidence": 0.983 + }, + { + "text": "est", + "start": 43.7, + "end": 43.88, + "confidence": 0.991 + }, + { + "text": "vraisemblablement", + "start": 43.88, + "end": 44.98, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 44.98, + "end": 45.38, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 45.38, + "end": 45.7, + "confidence": 0.969 + }, + { + "text": "l'histoire", + "start": 45.7, + "end": 45.98, + "confidence": 0.957 + }, + { + "text": "de", + "start": 45.98, + "end": 46.18, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.18, + "end": 46.54, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 46.54, + "end": 48.82, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 46.54, + "end": 47.24, + "confidence": 0.846 + }, + { + "text": "ça,", + "start": 47.24, + "end": 47.72, + "confidence": 0.935 + }, + { + "text": "ça", + "start": 47.72, + "end": 47.76, + "confidence": 0.977 + }, + { + "text": "soulève", + "start": 47.76, + "end": 47.84, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.84, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.26, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.26, + "end": 48.82, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.22, + "end": 55.46, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit?", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.988, + "words": [ + { + "text": "Est-ce", + "start": 49.22, + "end": 49.62, + "confidence": 0.982 + }, + { + "text": "que", + "start": 49.62, + "end": 49.72, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.72, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 49.98, + "confidence": 0.999 + }, + { + "text": "que", + "start": 49.98, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.991 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.8, + "confidence": 0.996 + }, + { + "text": "induit", + "start": 51.8, + "end": 52.32, + "confidence": 0.977 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.983 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.72, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 52.72, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.44, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.66, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.66, + "end": 54.02, + "confidence": 0.916 + }, + { + "text": "aussi", + "start": 54.02, + "end": 54.54, + "confidence": 0.995 + }, + { + "text": "un", + "start": 54.54, + "end": 54.7, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 54.7, + "end": 55.0, + "confidence": 0.996 + }, + { + "text": "inédit?", + "start": 55.0, + "end": 55.46, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.46, + "end": 63.12, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone?", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.965, + "words": [ + { + "text": "Je", + "start": 55.46, + "end": 55.9, + "confidence": 0.88 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.988 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.24, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.24, + "end": 56.36, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.36, + "end": 56.42, + "confidence": 0.99 + }, + { + "text": "le", + "start": 56.42, + "end": 56.58, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.88, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.88, + "end": 57.04, + "confidence": 0.986 + }, + { + "text": "a", + "start": 57.04, + "end": 57.18, + "confidence": 0.989 + }, + { + "text": "au", + "start": 57.18, + "end": 57.28, + "confidence": 0.968 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.994 + }, + { + "text": "est", + "start": 57.6, + "end": 57.92, + "confidence": 0.95 + }, + { + "text": "comparable", + "start": 57.92, + "end": 58.24, + "confidence": 0.997 + }, + { + "text": "à", + "start": 58.24, + "end": 58.48, + "confidence": 0.949 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.9, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.9, + "end": 59.32, + "confidence": 0.929 + }, + { + "text": "à", + "start": 59.32, + "end": 59.46, + "confidence": 0.958 + }, + { + "text": "d'autres", + "start": 59.46, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.96, + "confidence": 0.991 + }, + { + "text": "techniques", + "start": 59.96, + "end": 60.46, + "confidence": 0.983 + }, + { + "text": "comme", + "start": 60.46, + "end": 60.88, + "confidence": 0.587 + }, + { + "text": "la", + "start": 60.88, + "end": 61.5, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.5, + "end": 62.06, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.06, + "end": 62.36, + "confidence": 0.945 + }, + { + "text": "le", + "start": 62.36, + "end": 62.68, + "confidence": 0.998 + }, + { + "text": "téléphone?", + "start": 62.68, + "end": 63.12, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 63.36, + "end": 66.66, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.936, + "words": [ + { + "text": "Il", + "start": 63.36, + "end": 65.42, + "confidence": 0.779 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.48, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.48, + "end": 65.54, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 65.54, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.22, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.22, + "end": 66.42, + "confidence": 0.601 + }, + { + "text": "fait.", + "start": 66.42, + "end": 66.66, + "confidence": 0.996 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.52, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.953, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 66.98, + "confidence": 0.599 + }, + { + "text": "donc", + "start": 66.98, + "end": 67.08, + "confidence": 0.901 + }, + { + "text": "cette", + "start": 67.08, + "end": 67.28, + "confidence": 0.71 + }, + { + "text": "espèce", + "start": 67.28, + "end": 67.54, + "confidence": 0.995 + }, + { + "text": "de", + "start": 67.54, + "end": 67.68, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.68, + "end": 68.48, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.66, + "confidence": 0.98 + }, + { + "text": "la", + "start": 68.66, + "end": 68.94, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.94, + "end": 69.22, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.22, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.24, + "confidence": 0.997 + }, + { + "text": "c'est", + "start": 70.24, + "end": 70.38, + "confidence": 0.98 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.64, + "confidence": 0.978 + }, + { + "text": "et", + "start": 70.64, + "end": 70.76, + "confidence": 0.964 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.52, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 4884, + "start": 71.62, + "end": 76.48, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet,", + "tokens": [ + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 1719, + 368, + 8603, + 14964, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.813, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.86, + "confidence": 0.512 + }, + { + "text": "qu'on", + "start": 71.86, + "end": 72.12, + "confidence": 0.937 + }, + { + "text": "a", + "start": 72.12, + "end": 72.44, + "confidence": 0.982 + }, + { + "text": "l'impression,", + "start": 72.44, + "end": 73.56, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 73.56, + "end": 73.84, + "confidence": 0.964 + }, + { + "text": "le", + "start": 73.84, + "end": 74.0, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.0, + "end": 74.2, + "confidence": 0.998 + }, + { + "text": "les", + "start": 74.2, + "end": 74.4, + "confidence": 0.994 + }, + { + "text": "utilisateurs", + "start": 74.4, + "end": 74.84, + "confidence": 0.997 + }, + { + "text": "et", + "start": 74.84, + "end": 74.96, + "confidence": 0.329 + }, + { + "text": "les", + "start": 74.96, + "end": 75.0, + "confidence": 0.773 + }, + { + "text": "services,", + "start": 75.0, + "end": 75.22, + "confidence": 0.214 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.42, + "confidence": 0.766 + }, + { + "text": "dépendants", + "start": 75.42, + "end": 75.96, + "confidence": 0.789 + }, + { + "text": "de", + "start": 75.96, + "end": 76.08, + "confidence": 0.986 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.996 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.48, + "confidence": 0.996 + } + ] + }, + { + "id": 16, + "seek": 7684, + "start": 76.86, + "end": 83.26, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.917, + "words": [ + { + "text": "d'induire", + "start": 76.86, + "end": 77.08, + "confidence": 0.841 + }, + { + "text": "en", + "start": 77.08, + "end": 77.24, + "confidence": 0.618 + }, + { + "text": "fait", + "start": 77.24, + "end": 77.34, + "confidence": 0.994 + }, + { + "text": "une", + "start": 77.34, + "end": 77.52, + "confidence": 0.983 + }, + { + "text": "espèce", + "start": 77.52, + "end": 77.88, + "confidence": 0.996 + }, + { + "text": "de", + "start": 77.88, + "end": 78.48, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.48, + "end": 78.6, + "confidence": 0.596 + }, + { + "text": "de", + "start": 78.6, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.74, + "confidence": 0.967 + }, + { + "text": "le", + "start": 79.74, + "end": 79.92, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 79.92, + "end": 80.64, + "confidence": 0.992 + }, + { + "text": "qui", + "start": 80.64, + "end": 81.1, + "confidence": 0.696 + }, + { + "text": "rend", + "start": 81.1, + "end": 81.64, + "confidence": 0.91 + }, + { + "text": "de", + "start": 81.64, + "end": 81.78, + "confidence": 0.712 + }, + { + "text": "l'ampleur", + "start": 81.78, + "end": 82.02, + "confidence": 0.987 + }, + { + "text": "et", + "start": 82.02, + "end": 82.12, + "confidence": 0.931 + }, + { + "text": "qui", + "start": 82.12, + "end": 82.24, + "confidence": 0.976 + }, + { + "text": "amène", + "start": 82.24, + "end": 82.36, + "confidence": 0.973 + }, + { + "text": "aussi", + "start": 82.36, + "end": 82.56, + "confidence": 0.938 + }, + { + "text": "à", + "start": 82.56, + "end": 82.62, + "confidence": 0.941 + }, + { + "text": "des", + "start": 82.62, + "end": 82.72, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.72, + "end": 82.9, + "confidence": 0.993 + }, + { + "text": "de", + "start": 82.9, + "end": 83.02, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.02, + "end": 83.26, + "confidence": 0.883 + } + ] + }, + { + "id": 17, + "seek": 7684, + "start": 83.94, + "end": 87.8, + "text": " Donc, à objet inédit, rapport inédit.", + "tokens": [ + 7477, + 11, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.962, + "words": [ + { + "text": "Donc,", + "start": 83.94, + "end": 84.94, + "confidence": 0.971 + }, + { + "text": "à", + "start": 84.94, + "end": 84.98, + "confidence": 0.88 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.36, + "confidence": 0.828 + }, + { + "text": "inédit,", + "start": 85.36, + "end": 86.56, + "confidence": 0.993 + }, + { + "text": "rapport", + "start": 86.56, + "end": 87.0, + "confidence": 0.981 + }, + { + "text": "inédit.", + "start": 87.0, + "end": 87.8, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7684, + "start": 88.02, + "end": 95.14, + "text": " Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.972, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.48, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 88.48, + "end": 88.86, + "confidence": 0.975 + }, + { + "text": "rapport,", + "start": 88.86, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.84, + "confidence": 0.996 + }, + { + "text": "crois", + "start": 89.84, + "end": 89.88, + "confidence": 0.984 + }, + { + "text": "Nicolas,", + "start": 89.88, + "end": 90.54, + "confidence": 0.681 + }, + { + "text": "serait", + "start": 90.54, + "end": 90.94, + "confidence": 0.885 + }, + { + "text": "caractérisé", + "start": 90.94, + "end": 91.8, + "confidence": 0.993 + }, + { + "text": "par", + "start": 91.8, + "end": 92.12, + "confidence": 0.997 + }, + { + "text": "un", + "start": 92.12, + "end": 92.44, + "confidence": 0.997 + }, + { + "text": "mélange", + "start": 92.44, + "end": 92.98, + "confidence": 0.999 + }, + { + "text": "de", + "start": 92.98, + "end": 93.4, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.4, + "end": 94.24, + "confidence": 0.953 + }, + { + "text": "et", + "start": 94.24, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.68, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.68, + "end": 95.14, + "confidence": 0.993 + } + ] + }, + { + "id": 19, + "seek": 7684, + "start": 95.78, + "end": 102.86, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.941, + "words": [ + { + "text": "Bon,", + "start": 95.78, + "end": 96.38, + "confidence": 0.792 + }, + { + "text": "en", + "start": 96.38, + "end": 96.52, + "confidence": 0.998 + }, + { + "text": "vrai,", + "start": 96.52, + "end": 97.14, + "confidence": 0.994 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.998 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.997 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.08, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.08, + "end": 98.6, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.6, + "end": 98.7, + "confidence": 0.768 + }, + { + "text": "finement", + "start": 98.7, + "end": 99.32, + "confidence": 0.849 + }, + { + "text": "toute", + "start": 99.32, + "end": 99.7, + "confidence": 0.984 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.06, + "confidence": 0.997 + }, + { + "text": "des", + "start": 100.06, + "end": 100.24, + "confidence": 0.998 + }, + { + "text": "objets", + "start": 100.24, + "end": 100.48, + "confidence": 0.999 + }, + { + "text": "techniques", + "start": 100.48, + "end": 101.02, + "confidence": 0.984 + }, + { + "text": "et", + "start": 101.02, + "end": 101.48, + "confidence": 0.527 + }, + { + "text": "de", + "start": 101.48, + "end": 101.68, + "confidence": 0.994 + }, + { + "text": "leur", + "start": 101.68, + "end": 101.84, + "confidence": 0.833 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.32, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.32, + "end": 102.48, + "confidence": 0.99 + }, + { + "text": "nos", + "start": 102.48, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.86, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 7684, + "start": 102.9, + "end": 105.74, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.981, + "words": [ + { + "text": "pour", + "start": 102.9, + "end": 103.06, + "confidence": 0.822 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.66, + "confidence": 0.997 + }, + { + "text": "si", + "start": 103.66, + "end": 103.76, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.997 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.26, + "end": 104.74, + "confidence": 0.998 + }, + { + "text": "totalement", + "start": 104.74, + "end": 105.3, + "confidence": 0.999 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.74, + "confidence": 0.999 + } + ] + }, + { + "id": 21, + "seek": 10584, + "start": 106.1, + "end": 109.34, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.94, + "words": [ + { + "text": "Mais", + "start": 106.1, + "end": 106.36, + "confidence": 0.947 + }, + { + "text": "j'ai", + "start": 106.36, + "end": 106.92, + "confidence": 0.941 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.36, + "confidence": 0.996 + }, + { + "text": "comme", + "start": 107.36, + "end": 107.56, + "confidence": 0.641 + }, + { + "text": "ça", + "start": 107.56, + "end": 107.82, + "confidence": 0.978 + }, + { + "text": "que", + "start": 107.82, + "end": 107.96, + "confidence": 0.976 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.46, + "confidence": 0.985 + }, + { + "text": "ne", + "start": 108.46, + "end": 108.66, + "confidence": 0.726 + }, + { + "text": "se", + "start": 108.66, + "end": 108.7, + "confidence": 0.991 + }, + { + "text": "trompe", + "start": 108.7, + "end": 108.88, + "confidence": 0.995 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.34, + "confidence": 0.991 + } + ] + }, + { + "id": 22, + "seek": 10584, + "start": 109.88, + "end": 114.98, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.969, + "words": [ + { + "text": "Pour", + "start": 109.88, + "end": 110.08, + "confidence": 0.997 + }, + { + "text": "autant", + "start": 110.08, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.42, + "confidence": 0.988 + }, + { + "text": "je", + "start": 110.42, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 111.14, + "confidence": 0.963 + }, + { + "text": "il", + "start": 111.14, + "end": 111.18, + "confidence": 0.997 + }, + { + "text": "y", + "start": 111.18, + "end": 111.32, + "confidence": 0.992 + }, + { + "text": "a", + "start": 111.32, + "end": 111.36, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.36, + "end": 111.68, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.68, + "end": 111.88, + "confidence": 0.974 + }, + { + "text": "de", + "start": 111.88, + "end": 112.06, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.06, + "end": 112.6, + "confidence": 0.799 + }, + { + "text": "autour", + "start": 112.6, + "end": 112.94, + "confidence": 0.995 + }, + { + "text": "de", + "start": 112.94, + "end": 113.46, + "confidence": 0.997 + }, + { + "text": "la", + "start": 113.46, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.86, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.86, + "end": 114.06, + "confidence": 0.765 + }, + { + "text": "même", + "start": 114.06, + "end": 114.44, + "confidence": 0.996 + }, + { + "text": "du", + "start": 114.44, + "end": 114.6, + "confidence": 0.995 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 114.98, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 10584, + "start": 115.34, + "end": 119.84, + "text": " Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.972, + "words": [ + { + "text": "Mais", + "start": 115.34, + "end": 115.72, + "confidence": 0.994 + }, + { + "text": "la", + "start": 115.72, + "end": 116.02, + "confidence": 0.937 + }, + { + "text": "dépendance", + "start": 116.02, + "end": 116.4, + "confidence": 0.997 + }, + { + "text": "n'était", + "start": 116.4, + "end": 116.62, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.16, + "confidence": 0.995 + }, + { + "text": "même", + "start": 117.16, + "end": 117.46, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 117.46, + "end": 117.78, + "confidence": 0.999 + }, + { + "text": "Donc", + "start": 117.78, + "end": 117.98, + "confidence": 0.804 + }, + { + "text": "le", + "start": 117.98, + "end": 118.34, + "confidence": 0.715 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.62, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.62, + "end": 118.78, + "confidence": 0.975 + }, + { + "text": "plus", + "start": 118.78, + "end": 118.94, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.12, + "confidence": 0.987 + }, + { + "text": "pas", + "start": 119.12, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.38, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.38, + "end": 119.56, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.56, + "end": 119.84, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 10584, + "start": 119.98, + "end": 123.02, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.991, + "words": [ + { + "text": "On", + "start": 119.98, + "end": 120.18, + "confidence": 0.996 + }, + { + "text": "peut", + "start": 120.18, + "end": 120.38, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.38, + "end": 120.66, + "confidence": 0.99 + }, + { + "text": "sa", + "start": 120.66, + "end": 120.88, + "confidence": 0.985 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.46, + "confidence": 0.984 + }, + { + "text": "en", + "start": 121.46, + "end": 121.56, + "confidence": 0.989 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.74, + "confidence": 0.998 + }, + { + "text": "besoin", + "start": 121.74, + "end": 122.1, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.1, + "end": 122.34, + "confidence": 0.987 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.68, + "confidence": 0.989 + }, + { + "text": "de", + "start": 122.68, + "end": 122.8, + "confidence": 0.998 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.02, + "confidence": 0.989 + } + ] + }, + { + "id": 25, + "seek": 10584, + "start": 123.28, + "end": 126.36, + "text": " Et bien, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3610, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.895, + "words": [ + { + "text": "Et", + "start": 123.28, + "end": 123.46, + "confidence": 0.667 + }, + { + "text": "bien,", + "start": 123.46, + "end": 123.86, + "confidence": 0.469 + }, + { + "text": "le", + "start": 123.86, + "end": 123.98, + "confidence": 0.996 + }, + { + "text": "soir,", + "start": 123.98, + "end": 124.68, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 124.68, + "end": 124.9, + "confidence": 0.997 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.998 + }, + { + "text": "va", + "start": 125.02, + "end": 125.14, + "confidence": 0.996 + }, + { + "text": "se", + "start": 125.14, + "end": 125.38, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.38, + "end": 125.8, + "confidence": 0.987 + }, + { + "text": "on", + "start": 125.8, + "end": 126.02, + "confidence": 0.995 + }, + { + "text": "la", + "start": 126.02, + "end": 126.22, + "confidence": 0.802 + }, + { + "text": "laisse.", + "start": 126.22, + "end": 126.36, + "confidence": 0.999 + } + ] + }, + { + "id": 26, + "seek": 10584, + "start": 126.98, + "end": 130.48, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.914, + "words": [ + { + "text": "On", + "start": 126.98, + "end": 127.32, + "confidence": 0.954 + }, + { + "text": "ne", + "start": 127.32, + "end": 127.36, + "confidence": 0.803 + }, + { + "text": "l'a", + "start": 127.36, + "end": 127.48, + "confidence": 0.974 + }, + { + "text": "pas", + "start": 127.48, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.8, + "confidence": 0.996 + }, + { + "text": "la", + "start": 127.8, + "end": 128.06, + "confidence": 0.994 + }, + { + "text": "main", + "start": 128.06, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.44, + "confidence": 0.935 + }, + { + "text": "on", + "start": 128.44, + "end": 128.62, + "confidence": 0.997 + }, + { + "text": "est", + "start": 128.62, + "end": 128.68, + "confidence": 0.993 + }, + { + "text": "au", + "start": 128.68, + "end": 129.04, + "confidence": 0.984 + }, + { + "text": "lit,", + "start": 129.04, + "end": 129.14, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.14, + "end": 129.26, + "confidence": 0.427 + }, + { + "text": "ne", + "start": 129.26, + "end": 129.3, + "confidence": 0.963 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.5, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 129.5, + "end": 129.68, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.68, + "end": 129.86, + "confidence": 0.674 + }, + { + "text": "chiottes.", + "start": 129.86, + "end": 130.48, + "confidence": 0.828 + } + ] + }, + { + "id": 27, + "seek": 13084, + "start": 130.86, + "end": 136.9, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.955, + "words": [ + { + "text": "On", + "start": 130.86, + "end": 131.04, + "confidence": 0.983 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.28, + "end": 131.48, + "confidence": 0.996 + }, + { + "text": "énervé", + "start": 131.48, + "end": 132.22, + "confidence": 0.906 + }, + { + "text": "par", + "start": 132.22, + "end": 132.44, + "confidence": 0.991 + }, + { + "text": "son", + "start": 132.44, + "end": 132.7, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.7, + "end": 133.1, + "confidence": 0.832 + }, + { + "text": "qui", + "start": 133.1, + "end": 133.34, + "confidence": 0.919 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.76, + "confidence": 0.991 + }, + { + "text": "la", + "start": 133.76, + "end": 133.8, + "confidence": 0.992 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.08, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.08, + "end": 134.14, + "confidence": 0.997 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.6, + "confidence": 0.985 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.82, + "confidence": 0.981 + }, + { + "text": "une", + "start": 134.82, + "end": 135.2, + "confidence": 0.87 + }, + { + "text": "heure", + "start": 135.2, + "end": 135.36, + "confidence": 0.998 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.54, + "confidence": 0.991 + }, + { + "text": "soir", + "start": 135.54, + "end": 135.8, + "confidence": 0.996 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.96, + "confidence": 0.79 + }, + { + "text": "discuter", + "start": 135.96, + "end": 136.28, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.28, + "end": 136.48, + "confidence": 0.996 + }, + { + "text": "un", + "start": 136.48, + "end": 136.6, + "confidence": 0.999 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.9, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 13084, + "start": 137.28, + "end": 141.88, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.959, + "words": [ + { + "text": "Mais", + "start": 137.28, + "end": 137.46, + "confidence": 0.993 + }, + { + "text": "ça", + "start": 137.46, + "end": 137.68, + "confidence": 0.938 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.94, + "confidence": 0.998 + }, + { + "text": "ressemblait", + "start": 137.94, + "end": 138.4, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 138.4, + "end": 138.76, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 138.94, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.1, + "confidence": 0.986 + }, + { + "text": "peut", + "start": 139.1, + "end": 139.48, + "confidence": 0.988 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.12, + "confidence": 0.997 + }, + { + "text": "à", + "start": 140.12, + "end": 140.32, + "confidence": 0.498 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.855 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.68, + "confidence": 0.989 + }, + { + "text": "même", + "start": 140.68, + "end": 140.94, + "confidence": 0.984 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.34, + "confidence": 0.998 + }, + { + "text": "aujourd'hui,", + "start": 141.34, + "end": 141.88, + "confidence": 0.988 + } + ] + }, + { + "id": 29, + "seek": 13084, + "start": 142.14, + "end": 146.3, + "text": " continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe,", + "tokens": [ + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.963, + "words": [ + { + "text": "continuellement", + "start": 142.14, + "end": 142.94, + "confidence": 0.971 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.18, + "confidence": 0.805 + }, + { + "text": "son", + "start": 143.18, + "end": 143.38, + "confidence": 0.995 + }, + { + "text": "smartphone", + "start": 143.38, + "end": 143.76, + "confidence": 0.978 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.94, + "confidence": 0.979 + }, + { + "text": "la", + "start": 143.94, + "end": 144.0, + "confidence": 0.996 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.36, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 144.36, + "end": 144.52, + "confidence": 0.835 + }, + { + "text": "si", + "start": 144.52, + "end": 144.64, + "confidence": 0.975 + }, + { + "text": "c'était", + "start": 144.64, + "end": 144.82, + "confidence": 0.991 + }, + { + "text": "une", + "start": 144.82, + "end": 145.06, + "confidence": 0.989 + }, + { + "text": "sorte", + "start": 145.06, + "end": 145.22, + "confidence": 0.997 + }, + { + "text": "de", + "start": 145.22, + "end": 145.3, + "confidence": 0.982 + }, + { + "text": "pacemaker", + "start": 145.3, + "end": 145.82, + "confidence": 0.917 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.3, + "confidence": 0.992 + } + ] + }, + { + "id": 30, + "seek": 13084, + "start": 146.34, + "end": 148.84, + "text": " comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.989, + "words": [ + { + "text": "comme", + "start": 146.34, + "end": 146.58, + "confidence": 0.996 + }, + { + "text": "si", + "start": 146.58, + "end": 146.76, + "confidence": 0.994 + }, + { + "text": "le", + "start": 146.76, + "end": 146.84, + "confidence": 0.997 + }, + { + "text": "lâcher", + "start": 146.84, + "end": 147.36, + "confidence": 0.969 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.56, + "confidence": 0.993 + }, + { + "text": "entraîner", + "start": 147.56, + "end": 147.86, + "confidence": 0.978 + }, + { + "text": "sa", + "start": 147.86, + "end": 148.0, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.0, + "end": 148.22, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.22, + "end": 148.84, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 13084, + "start": 149.04, + "end": 151.96, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.974, + "words": [ + { + "text": "Bon,", + "start": 149.04, + "end": 149.28, + "confidence": 0.918 + }, + { + "text": "je", + "start": 149.28, + "end": 149.32, + "confidence": 0.934 + }, + { + "text": "dis", + "start": 149.32, + "end": 149.46, + "confidence": 0.988 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.994 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.74, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.74, + "end": 149.88, + "confidence": 0.995 + }, + { + "text": "môme,", + "start": 149.88, + "end": 150.32, + "confidence": 0.998 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.52, + "confidence": 0.791 + }, + { + "text": "c'est", + "start": 150.52, + "end": 150.82, + "confidence": 0.981 + }, + { + "text": "évidemment", + "start": 150.82, + "end": 151.14, + "confidence": 0.98 + }, + { + "text": "valable", + "start": 151.14, + "end": 151.48, + "confidence": 0.998 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.62, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 151.62, + "end": 151.76, + "confidence": 0.999 + }, + { + "text": "aussi.", + "start": 151.76, + "end": 151.96, + "confidence": 0.996 + } + ] + }, + { + "id": 32, + "seek": 13084, + "start": 152.34, + "end": 158.22, + "text": " Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais?", + "tokens": [ + 7477, + 11, + 18018, + 294, + 7811, + 270, + 13, + 413, + 6, + 19947, + 13, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.948, + "words": [ + { + "text": "Donc,", + "start": 152.34, + "end": 153.46, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 153.46, + "end": 153.66, + "confidence": 0.976 + }, + { + "text": "inédit.", + "start": 153.66, + "end": 154.24, + "confidence": 0.996 + }, + { + "text": "D'accord.", + "start": 154.24, + "end": 155.48, + "confidence": 0.978 + }, + { + "text": "Mais", + "start": 155.48, + "end": 155.82, + "confidence": 0.557 + }, + { + "text": "pourquoi", + "start": 155.82, + "end": 156.32, + "confidence": 0.994 + }, + { + "text": "a-t-on", + "start": 156.32, + "end": 156.68, + "confidence": 0.94 + }, + { + "text": "l'impression", + "start": 156.68, + "end": 157.06, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.26, + "confidence": 0.995 + }, + { + "text": "n'en", + "start": 157.26, + "end": 157.44, + "confidence": 0.878 + }, + { + "text": "sortira", + "start": 157.44, + "end": 157.9, + "confidence": 0.989 + }, + { + "text": "jamais?", + "start": 157.9, + "end": 158.22, + "confidence": 0.998 + } + ] + }, + { + "id": 33, + "seek": 15884, + "start": 158.86, + "end": 165.32, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux?", + "tokens": [ + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.962, + "words": [ + { + "text": "Est-ce", + "start": 158.86, + "end": 159.34, + "confidence": 0.981 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.997 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.62, + "confidence": 0.999 + }, + { + "text": "en", + "start": 159.62, + "end": 159.78, + "confidence": 0.97 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.12, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.12, + "end": 160.34, + "confidence": 0.998 + }, + { + "text": "faute", + "start": 160.34, + "end": 160.66, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.66, + "end": 160.94, + "confidence": 0.995 + }, + { + "text": "les", + "start": 160.94, + "end": 161.28, + "confidence": 0.995 + }, + { + "text": "gens", + "start": 161.28, + "end": 161.44, + "confidence": 1.0 + }, + { + "text": "qui", + "start": 161.44, + "end": 161.58, + "confidence": 0.984 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.72, + "confidence": 0.998 + }, + { + "text": "créé", + "start": 161.72, + "end": 162.3, + "confidence": 0.99 + }, + { + "text": "cet", + "start": 162.3, + "end": 162.46, + "confidence": 0.852 + }, + { + "text": "outil", + "start": 162.46, + "end": 162.78, + "confidence": 0.99 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.34, + "confidence": 0.994 + }, + { + "text": "et", + "start": 163.34, + "end": 163.5, + "confidence": 0.954 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.86, + "confidence": 0.992 + }, + { + "text": "et", + "start": 163.86, + "end": 163.92, + "confidence": 0.54 + }, + { + "text": "diabolique", + "start": 163.92, + "end": 164.4, + "confidence": 0.951 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.703 + }, + { + "text": "que", + "start": 164.66, + "end": 164.84, + "confidence": 0.99 + }, + { + "text": "merveilleux?", + "start": 164.84, + "end": 165.32, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15884, + "start": 166.34, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.984, + "words": [ + { + "text": "Les", + "start": 166.34, + "end": 167.04, + "confidence": 0.926 + }, + { + "text": "économistes", + "start": 167.04, + "end": 167.48, + "confidence": 0.998 + }, + { + "text": "parlent", + "start": 167.48, + "end": 167.68, + "confidence": 0.995 + }, + { + "text": "de", + "start": 167.68, + "end": 167.82, + "confidence": 0.995 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.36, + "confidence": 0.985 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.997 + }, + { + "text": "sentier.", + "start": 168.5, + "end": 168.82, + "confidence": 0.978 + } + ] + }, + { + "id": 35, + "seek": 16884, + "start": 168.86, + "end": 177.42, + "text": " C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 50364, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 871, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.10415018598238628, + "compression_ratio": 1.3916666666666666, + "no_speech_prob": 3.958350498578511e-05, + "confidence": 0.93, + "words": [ + { + "text": "C'est", + "start": 168.86, + "end": 169.1, + "confidence": 0.952 + }, + { + "text": "l'idée", + "start": 169.1, + "end": 169.34, + "confidence": 0.992 + }, + { + "text": "qu'on", + "start": 169.34, + "end": 169.7, + "confidence": 0.825 + }, + { + "text": "est", + "start": 169.7, + "end": 169.88, + "confidence": 0.631 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.0, + "confidence": 0.993 + }, + { + "text": "un", + "start": 170.0, + "end": 170.54, + "confidence": 0.998 + }, + { + "text": "sentier", + "start": 170.54, + "end": 170.78, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 170.78, + "end": 170.84, + "confidence": 0.961 + }, + { + "text": "a", + "start": 170.84, + "end": 170.94, + "confidence": 0.98 + }, + { + "text": "été", + "start": 170.94, + "end": 171.12, + "confidence": 0.996 + }, + { + "text": "établi,", + "start": 171.12, + "end": 171.9, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.12, + "confidence": 0.989 + }, + { + "text": "volontairement", + "start": 172.12, + "end": 172.72, + "confidence": 0.965 + }, + { + "text": "en", + "start": 172.72, + "end": 172.8, + "confidence": 0.941 + }, + { + "text": "marchant", + "start": 172.8, + "end": 173.06, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.06, + "end": 174.24, + "confidence": 0.972 + }, + { + "text": "soit", + "start": 174.24, + "end": 174.92, + "confidence": 0.996 + }, + { + "text": "en", + "start": 174.92, + "end": 175.36, + "confidence": 0.991 + }, + { + "text": "définissant", + "start": 175.36, + "end": 175.5, + "confidence": 0.983 + }, + { + "text": "des", + "start": 175.5, + "end": 175.76, + "confidence": 0.984 + }, + { + "text": "bornes,", + "start": 175.76, + "end": 176.04, + "confidence": 0.975 + }, + { + "text": "en", + "start": 176.04, + "end": 176.08, + "confidence": 0.68 + }, + { + "text": "définissant", + "start": 176.08, + "end": 176.58, + "confidence": 0.997 + }, + { + "text": "une", + "start": 176.58, + "end": 176.82, + "confidence": 0.944 + }, + { + "text": "signalétique.", + "start": 176.82, + "end": 177.42, + "confidence": 0.642 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/bonjour.wav.words.json b/tests/expected/medium_fr/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..0ceff460cfed744a26ce7dd494b00fb39619a892 --- /dev/null +++ b/tests/expected/medium_fr/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7047327041625977, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08847080171108246, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/medium_fr/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..8afbcdb52fcce8751c2a6a40c480780b37bcd8e3 --- /dev/null +++ b/tests/expected/medium_fr/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,134 @@ +{ + "text": " Bonjour ! Est-ce que vous allez bien ? Bonjour ! Est-ce que vous allez bien ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.46, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.36153463216928333, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.07168596982955933, + "confidence": 0.937, + "words": [ + { + "text": "Bonjour !", + "start": 0.42, + "end": 1.92, + "confidence": 0.874 + }, + { + "text": "Est-ce", + "start": 1.92, + "end": 2.16, + "confidence": 0.886 + }, + { + "text": "que", + "start": 2.16, + "end": 2.24, + "confidence": 0.987 + }, + { + "text": "vous", + "start": 2.24, + "end": 2.38, + "confidence": 0.996 + }, + { + "text": "allez", + "start": 2.38, + "end": 2.58, + "confidence": 0.99 + }, + { + "text": "bien ?", + "start": 2.58, + "end": 3.46, + "confidence": 0.999 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.94, + "end": 35.86, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3062671698056735, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.40451109409332275, + "confidence": 0.933, + "words": [ + { + "text": "Bonjour !", + "start": 32.94, + "end": 34.44, + "confidence": 0.741 + }, + { + "text": "Est-ce", + "start": 34.44, + "end": 34.7, + "confidence": 0.921 + }, + { + "text": "que", + "start": 34.7, + "end": 34.76, + "confidence": 0.996 + }, + { + "text": "vous", + "start": 34.76, + "end": 34.9, + "confidence": 0.998 + }, + { + "text": "allez", + "start": 34.9, + "end": 35.1, + "confidence": 0.997 + }, + { + "text": "bien ?", + "start": 35.1, + "end": 35.86, + "confidence": 0.999 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/empty.mp3.words.json b/tests/expected/medium_fr/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b2962749bd0319081c8fcb0ff1149977742d21ee --- /dev/null +++ b/tests/expected/medium_fr/empty.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 1.58, + "text": " ...", + "tokens": [ + 50364, + 1097, + 50518 + ], + "temperature": 0.0, + "avg_logprob": -0.8883877396583557, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.4414949119091034, + "confidence": 0.127, + "words": [ + { + "text": "...", + "start": 0.08, + "end": 1.58, + "confidence": 0.127 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/gaenswein15.mp3.words.json b/tests/expected/medium_fr/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..c4336bc1a0b547f625582250878be0e52d1ebff2 --- /dev/null +++ b/tests/expected/medium_fr/gaenswein15.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 8.46, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51122 + ], + "temperature": 0.0, + "avg_logprob": -1.0777095556259155, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.2541808485984802, + "confidence": 0.133, + "words": [ + { + "text": "...", + "start": 0.0, + "end": 8.46, + "confidence": 0.133 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/gloria.mp3.words.json b/tests/expected/medium_fr/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..c7d918e8138b7683ef7d09aef2d2459394bd775e --- /dev/null +++ b/tests/expected/medium_fr/gloria.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 20.18, + "end": 20.2, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51392 + ], + "temperature": 0.0, + "avg_logprob": -1.440472960472107, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.05056881532073021, + "confidence": 0.159, + "words": [ + { + "text": "...", + "start": 20.18, + "end": 20.2, + "confidence": 0.159 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/laugh1.mp3.words.json b/tests/expected/medium_fr/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..889b70a4559c01e100ae0f42bf7438784b35d4c0 --- /dev/null +++ b/tests/expected/medium_fr/laugh1.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " Rires", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 1.7, + "text": " Rires", + "tokens": [ + 50364, + 497, + 3145, + 50576 + ], + "temperature": 0.0, + "avg_logprob": -1.0559712409973145, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.5892603397369385, + "confidence": 0.254, + "words": [ + { + "text": "Rires", + "start": 0.0, + "end": 1.7, + "confidence": 0.254 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/laugh2.mp3.words.json b/tests/expected/medium_fr/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..30c27e9d9c4520aeda635cb4c2b650545df77bdc --- /dev/null +++ b/tests/expected/medium_fr/laugh2.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " Hehehe", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.22, + "end": 0.64, + "text": " Hehehe", + "tokens": [ + 50364, + 634, + 23500, + 50398 + ], + "temperature": 0.0, + "avg_logprob": -1.2395231246948242, + "compression_ratio": 0.5, + "no_speech_prob": 0.3666148781776428, + "confidence": 0.173, + "words": [ + { + "text": "Hehehe", + "start": 0.22, + "end": 0.64, + "confidence": 0.173 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/punctuations.mp3.words.json b/tests/expected/medium_fr/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f1765f3c8fa2266342bd52ade9f7749c6a73b8 --- /dev/null +++ b/tests/expected/medium_fr/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26349667941822724, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03940592333674431, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi,", + "start": 0.38, + "end": 1.1, + "confidence": 0.807 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole ?", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/radio_short.mp3.words.json b/tests/expected/medium_fr/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1d484bc1a745acb54519f41c47bcbc945aa2cf25 --- /dev/null +++ b/tests/expected/medium_fr/radio_short.mp3.words.json @@ -0,0 +1,1642 @@ +{ + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites. Winamax, la référence du poker en ligne. Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité. Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons. Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt ? Question posée par les leaders de la CFDT et la CGT. Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT. Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National. La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story. Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite. ...", + "segments": [ + { + "id": 0, + "seek": 3000, + "start": 30.88, + "end": 34.26, + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites.", + "tokens": [ + 50364, + 1456, + 1804, + 1021, + 1609, + 36863, + 1769, + 408, + 4900, + 1736, + 1512, + 5467, + 279, + 11, + 269, + 6, + 377, + 1769, + 631, + 2630, + 465, + 29902, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.1931878740542403, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5573714971542358, + "confidence": 0.92, + "words": [ + { + "text": "Le", + "start": 30.88, + "end": 31.02, + "confidence": 0.536 + }, + { + "text": "plus", + "start": 31.02, + "end": 31.18, + "confidence": 0.99 + }, + { + "text": "important", + "start": 31.18, + "end": 31.5, + "confidence": 0.993 + }, + { + "text": "au", + "start": 31.5, + "end": 31.62, + "confidence": 0.946 + }, + { + "text": "poker", + "start": 31.62, + "end": 31.84, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 31.84, + "end": 32.0, + "confidence": 0.579 + }, + { + "text": "ne", + "start": 32.0, + "end": 32.06, + "confidence": 0.961 + }, + { + "text": "sont", + "start": 32.06, + "end": 32.16, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 32.16, + "end": 32.3, + "confidence": 0.991 + }, + { + "text": "les", + "start": 32.3, + "end": 32.46, + "confidence": 0.969 + }, + { + "text": "cartes,", + "start": 32.46, + "end": 33.26, + "confidence": 0.996 + }, + { + "text": "c'est", + "start": 33.48, + "end": 33.62, + "confidence": 0.95 + }, + { + "text": "ce", + "start": 33.62, + "end": 33.66, + "confidence": 0.99 + }, + { + "text": "que", + "start": 33.66, + "end": 33.76, + "confidence": 0.991 + }, + { + "text": "vous", + "start": 33.76, + "end": 33.9, + "confidence": 0.991 + }, + { + "text": "en", + "start": 33.9, + "end": 34.06, + "confidence": 0.982 + }, + { + "text": "faites.", + "start": 34.06, + "end": 34.26, + "confidence": 0.894 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 36.26, + "end": 38.74, + "text": " Winamax, la référence du poker en ligne.", + "tokens": [ + 50664, + 10427, + 2404, + 87, + 11, + 635, + 30170, + 41635, + 1581, + 36863, + 465, + 34207, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.1931878740542403, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5573714971542358, + "confidence": 0.967, + "words": [ + { + "text": "Winamax,", + "start": 36.26, + "end": 37.04, + "confidence": 0.905 + }, + { + "text": "la", + "start": 37.26, + "end": 37.36, + "confidence": 0.987 + }, + { + "text": "référence", + "start": 37.36, + "end": 37.78, + "confidence": 0.998 + }, + { + "text": "du", + "start": 37.78, + "end": 37.96, + "confidence": 0.992 + }, + { + "text": "poker", + "start": 37.96, + "end": 38.22, + "confidence": 0.998 + }, + { + "text": "en", + "start": 38.22, + "end": 38.44, + "confidence": 0.993 + }, + { + "text": "ligne.", + "start": 38.44, + "end": 38.74, + "confidence": 0.993 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 44.9, + "end": 51.38, + "text": " Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité.", + "tokens": [ + 51114, + 7368, + 539, + 347, + 1531, + 14437, + 1030, + 8317, + 11, + 2630, + 18935, + 1022, + 363, + 37, + 44, + 3558, + 11, + 4666, + 25232, + 465, + 2047, + 11, + 269, + 6, + 377, + 3610, + 18143, + 363, + 37, + 44, + 1657, + 4163, + 3486, + 1769, + 1956, + 3887, + 287, + 6, + 578, + 901, + 5066, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.1931878740542403, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5573714971542358, + "confidence": 0.907, + "words": [ + { + "text": "Bonsoir", + "start": 44.9, + "end": 45.42, + "confidence": 0.955 + }, + { + "text": "à", + "start": 45.42, + "end": 45.84, + "confidence": 0.542 + }, + { + "text": "toutes", + "start": 45.84, + "end": 46.06, + "confidence": 0.916 + }, + { + "text": "et", + "start": 46.06, + "end": 46.26, + "confidence": 0.988 + }, + { + "text": "tous,", + "start": 46.26, + "end": 46.5, + "confidence": 0.981 + }, + { + "text": "vous", + "start": 46.6, + "end": 46.74, + "confidence": 0.971 + }, + { + "text": "êtes", + "start": 46.74, + "end": 46.88, + "confidence": 0.995 + }, + { + "text": "sur", + "start": 46.88, + "end": 47.1, + "confidence": 0.989 + }, + { + "text": "BFM", + "start": 47.1, + "end": 47.52, + "confidence": 0.987 + }, + { + "text": "TV,", + "start": 47.52, + "end": 47.72, + "confidence": 0.853 + }, + { + "text": "nous", + "start": 47.84, + "end": 47.92, + "confidence": 0.83 + }, + { + "text": "sommes", + "start": 47.92, + "end": 48.14, + "confidence": 0.988 + }, + { + "text": "en", + "start": 48.14, + "end": 48.28, + "confidence": 0.987 + }, + { + "text": "direct,", + "start": 48.28, + "end": 48.52, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 48.7, + "end": 48.76, + "confidence": 0.939 + }, + { + "text": "bien", + "start": 48.76, + "end": 48.98, + "confidence": 0.971 + }, + { + "text": "sûr", + "start": 48.98, + "end": 49.22, + "confidence": 0.907 + }, + { + "text": "BFM", + "start": 49.22, + "end": 49.82, + "confidence": 0.95 + }, + { + "text": "story", + "start": 49.82, + "end": 50.04, + "confidence": 0.356 + }, + { + "text": "avec", + "start": 50.04, + "end": 50.26, + "confidence": 0.554 + }, + { + "text": "tout", + "start": 50.26, + "end": 50.42, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 50.42, + "end": 50.52, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 50.52, + "end": 50.58, + "confidence": 0.951 + }, + { + "text": "fait", + "start": 50.58, + "end": 50.72, + "confidence": 0.961 + }, + { + "text": "l'actualité.", + "start": 50.72, + "end": 51.38, + "confidence": 0.994 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 51.96, + "end": 56.22, + "text": " Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons.", + "tokens": [ + 51414, + 13710, + 394, + 4060, + 2077, + 1769, + 4900, + 730, + 18638, + 5482, + 11, + 730, + 37560, + 11, + 730, + 3960, + 12299, + 631, + 4666, + 2630, + 7532, + 892, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.1931878740542403, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5573714971542358, + "confidence": 0.948, + "words": [ + { + "text": "Durant", + "start": 51.96, + "end": 52.32, + "confidence": 0.935 + }, + { + "text": "60", + "start": 52.32, + "end": 52.76, + "confidence": 0.921 + }, + { + "text": "minutes", + "start": 52.76, + "end": 53.08, + "confidence": 0.912 + }, + { + "text": "ce", + "start": 53.08, + "end": 53.28, + "confidence": 0.754 + }, + { + "text": "sont", + "start": 53.28, + "end": 53.62, + "confidence": 0.979 + }, + { + "text": "des", + "start": 53.62, + "end": 53.84, + "confidence": 0.99 + }, + { + "text": "gros", + "start": 53.84, + "end": 54.06, + "confidence": 0.969 + }, + { + "text": "plans,", + "start": 54.06, + "end": 54.2, + "confidence": 0.958 + }, + { + "text": "des", + "start": 54.32, + "end": 54.44, + "confidence": 0.986 + }, + { + "text": "analyses,", + "start": 54.44, + "end": 54.68, + "confidence": 0.914 + }, + { + "text": "des", + "start": 54.78, + "end": 54.86, + "confidence": 0.995 + }, + { + "text": "réactions", + "start": 54.86, + "end": 55.22, + "confidence": 0.996 + }, + { + "text": "que", + "start": 55.22, + "end": 55.4, + "confidence": 0.882 + }, + { + "text": "nous", + "start": 55.4, + "end": 55.52, + "confidence": 0.948 + }, + { + "text": "vous", + "start": 55.52, + "end": 55.78, + "confidence": 0.99 + }, + { + "text": "proposons.", + "start": 55.78, + "end": 56.22, + "confidence": 0.991 + } + ] + }, + { + "id": 4, + "seek": 5600, + "start": 56.66, + "end": 64.23, + "text": " Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt ?", + "tokens": [ + 50364, + 16328, + 9336, + 4281, + 392, + 5977, + 12, + 388, + 10122, + 29350, + 268, + 347, + 635, + 3960, + 44562, + 730, + 49356, + 3324, + 11246, + 421, + 6, + 388, + 871, + 1741, + 2781, + 526, + 2680, + 601, + 35221, + 2096, + 9020, + 11, + 287, + 6, + 2518, + 9020, + 4281, + 392, + 12, + 33, + 3093, + 4463, + 33403, + 2506, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.1336895481232674, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.42523330450057983, + "confidence": 0.854, + "words": [ + { + "text": "Comment", + "start": 56.66, + "end": 56.96, + "confidence": 0.944 + }, + { + "text": "Eric", + "start": 56.96, + "end": 57.2, + "confidence": 0.877 + }, + { + "text": "Verth", + "start": 57.2, + "end": 57.36, + "confidence": 0.368 + }, + { + "text": "peut-il", + "start": 57.36, + "end": 57.8, + "confidence": 0.947 + }, + { + "text": "encore", + "start": 57.8, + "end": 58.22, + "confidence": 0.954 + }, + { + "text": "soutenir", + "start": 58.22, + "end": 58.62, + "confidence": 0.997 + }, + { + "text": "la", + "start": 58.62, + "end": 58.76, + "confidence": 0.977 + }, + { + "text": "réforme", + "start": 58.76, + "end": 59.2, + "confidence": 0.996 + }, + { + "text": "des", + "start": 59.2, + "end": 59.42, + "confidence": 0.992 + }, + { + "text": "retraites", + "start": 59.42, + "end": 59.76, + "confidence": 0.99 + }, + { + "text": "alors", + "start": 59.76, + "end": 59.98, + "confidence": 0.494 + }, + { + "text": "qu'il", + "start": 59.98, + "end": 60.74, + "confidence": 0.96 + }, + { + "text": "est", + "start": 60.74, + "end": 60.9, + "confidence": 0.973 + }, + { + "text": "englué", + "start": 60.9, + "end": 61.7, + "confidence": 0.945 + }, + { + "text": "dans", + "start": 61.7, + "end": 61.96, + "confidence": 0.961 + }, + { + "text": "sa", + "start": 61.96, + "end": 62.44, + "confidence": 0.85 + }, + { + "text": "propre", + "start": 62.44, + "end": 62.7, + "confidence": 0.992 + }, + { + "text": "affaire,", + "start": 62.7, + "end": 62.94, + "confidence": 0.993 + }, + { + "text": "l'affaire", + "start": 63.02, + "end": 63.3, + "confidence": 0.91 + }, + { + "text": "Verth-Bettancourt ?", + "start": 63.3, + "end": 64.23, + "confidence": 0.71 + } + ] + }, + { + "id": 5, + "seek": 5600, + "start": 64.23, + "end": 67.18, + "text": " Question posée par les leaders de la CFDT et la CGT.", + "tokens": [ + 50764, + 14464, + 1366, + 3856, + 971, + 1512, + 3523, + 368, + 635, + 21792, + 35, + 51, + 1030, + 635, + 38007, + 51, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.1336895481232674, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.42523330450057983, + "confidence": 0.894, + "words": [ + { + "text": "Question", + "start": 64.23, + "end": 64.34, + "confidence": 0.628 + }, + { + "text": "posée", + "start": 64.34, + "end": 64.74, + "confidence": 0.976 + }, + { + "text": "par", + "start": 64.74, + "end": 64.96, + "confidence": 0.988 + }, + { + "text": "les", + "start": 64.96, + "end": 65.12, + "confidence": 0.965 + }, + { + "text": "leaders", + "start": 65.12, + "end": 65.66, + "confidence": 0.977 + }, + { + "text": "de", + "start": 65.66, + "end": 65.92, + "confidence": 0.986 + }, + { + "text": "la", + "start": 65.92, + "end": 66.1, + "confidence": 0.967 + }, + { + "text": "CFDT", + "start": 66.1, + "end": 66.52, + "confidence": 0.994 + }, + { + "text": "et", + "start": 66.52, + "end": 66.6, + "confidence": 0.604 + }, + { + "text": "la", + "start": 66.6, + "end": 66.74, + "confidence": 0.605 + }, + { + "text": "CGT.", + "start": 66.74, + "end": 67.18, + "confidence": 0.987 + } + ] + }, + { + "id": 6, + "seek": 5600, + "start": 67.5, + "end": 76.38, + "text": " Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT.", + "tokens": [ + 50914, + 41587, + 3739, + 368, + 38268, + 318, + 809, + 78, + 1229, + 11, + 9336, + 4281, + 392, + 1515, + 23833, + 476, + 2795, + 11980, + 1022, + 1512, + 49356, + 3324, + 11, + 322, + 465, + 18508, + 2680, + 363, + 37, + 44, + 1657, + 4163, + 476, + 49525, + 368, + 635, + 21792, + 35, + 51, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.1336895481232674, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.42523330450057983, + "confidence": 0.968, + "words": [ + { + "text": "Réponse", + "start": 67.5, + "end": 67.96, + "confidence": 0.946 + }, + { + "text": "de", + "start": 67.96, + "end": 68.14, + "confidence": 0.962 + }, + { + "text": "Nicolas", + "start": 68.14, + "end": 68.38, + "confidence": 0.931 + }, + { + "text": "Sarkozy,", + "start": 68.38, + "end": 68.88, + "confidence": 0.987 + }, + { + "text": "Eric", + "start": 68.98, + "end": 69.24, + "confidence": 0.961 + }, + { + "text": "Verth", + "start": 69.24, + "end": 69.84, + "confidence": 0.986 + }, + { + "text": "portera", + "start": 69.84, + "end": 70.44, + "confidence": 0.966 + }, + { + "text": "le", + "start": 70.44, + "end": 71.06, + "confidence": 0.989 + }, + { + "text": "débat", + "start": 71.06, + "end": 71.46, + "confidence": 0.996 + }, + { + "text": "sur", + "start": 71.46, + "end": 71.7, + "confidence": 0.988 + }, + { + "text": "les", + "start": 71.7, + "end": 71.88, + "confidence": 0.997 + }, + { + "text": "retraites,", + "start": 71.88, + "end": 72.26, + "confidence": 0.997 + }, + { + "text": "on", + "start": 72.36, + "end": 72.5, + "confidence": 0.867 + }, + { + "text": "en", + "start": 72.5, + "end": 72.68, + "confidence": 0.951 + }, + { + "text": "parle", + "start": 72.68, + "end": 73.1, + "confidence": 0.996 + }, + { + "text": "dans", + "start": 73.1, + "end": 73.56, + "confidence": 0.981 + }, + { + "text": "BFM", + "start": 73.56, + "end": 74.46, + "confidence": 0.991 + }, + { + "text": "story", + "start": 74.46, + "end": 74.76, + "confidence": 0.761 + }, + { + "text": "avec", + "start": 74.76, + "end": 75.12, + "confidence": 0.847 + }, + { + "text": "le", + "start": 75.12, + "end": 75.32, + "confidence": 0.982 + }, + { + "text": "numéro", + "start": 75.32, + "end": 75.54, + "confidence": 0.997 + }, + { + "text": "de", + "start": 75.54, + "end": 75.76, + "confidence": 0.996 + }, + { + "text": "la", + "start": 75.76, + "end": 75.9, + "confidence": 0.987 + }, + { + "text": "CFDT.", + "start": 75.9, + "end": 76.38, + "confidence": 0.998 + } + ] + }, + { + "id": 7, + "seek": 5600, + "start": 76.8, + "end": 83.04, + "text": " Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National.", + "tokens": [ + 51364, + 3790, + 9093, + 1930, + 288, + 257, + 2251, + 272, + 3274, + 3409, + 1956, + 257, + 22761, + 2284, + 526, + 11, + 25722, + 3962, + 20415, + 1456, + 10571, + 1030, + 23046, + 460, + 2912, + 480, + 11, + 635, + 272, + 3274, + 3409, + 368, + 635, + 36624, + 368, + 13854, + 12, + 16639, + 414, + 1456, + 10571, + 1531, + 635, + 24661, + 1581, + 17348, + 4862, + 13, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.1336895481232674, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.42523330450057983, + "confidence": 0.871, + "words": [ + { + "text": "Et", + "start": 76.8, + "end": 76.96, + "confidence": 0.883 + }, + { + "text": "puis", + "start": 76.96, + "end": 77.06, + "confidence": 0.952 + }, + { + "text": "il", + "start": 77.06, + "end": 77.18, + "confidence": 0.789 + }, + { + "text": "y", + "start": 77.18, + "end": 77.2, + "confidence": 0.995 + }, + { + "text": "a", + "start": 77.2, + "end": 77.22, + "confidence": 0.986 + }, + { + "text": "une", + "start": 77.22, + "end": 77.28, + "confidence": 0.995 + }, + { + "text": "bataille", + "start": 77.28, + "end": 77.56, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 77.56, + "end": 77.72, + "confidence": 0.997 + }, + { + "text": "a", + "start": 77.72, + "end": 77.76, + "confidence": 0.985 + }, + { + "text": "démarré,", + "start": 77.76, + "end": 78.16, + "confidence": 0.988 + }, + { + "text": "celle", + "start": 78.24, + "end": 78.4, + "confidence": 0.997 + }, + { + "text": "entre", + "start": 78.4, + "end": 78.64, + "confidence": 0.975 + }, + { + "text": "Marine", + "start": 78.64, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "Le", + "start": 78.94, + "end": 79.12, + "confidence": 0.986 + }, + { + "text": "Pen", + "start": 79.12, + "end": 79.18, + "confidence": 0.985 + }, + { + "text": "et", + "start": 79.18, + "end": 79.34, + "confidence": 0.997 + }, + { + "text": "Bruno", + "start": 79.34, + "end": 79.54, + "confidence": 0.986 + }, + { + "text": "Gognich,", + "start": 79.54, + "end": 80.02, + "confidence": 0.239 + }, + { + "text": "la", + "start": 80.2, + "end": 80.32, + "confidence": 0.707 + }, + { + "text": "bataille", + "start": 80.32, + "end": 80.58, + "confidence": 0.998 + }, + { + "text": "de", + "start": 80.58, + "end": 80.72, + "confidence": 0.976 + }, + { + "text": "la", + "start": 80.72, + "end": 80.84, + "confidence": 0.987 + }, + { + "text": "succession", + "start": 80.84, + "end": 81.22, + "confidence": 0.982 + }, + { + "text": "de", + "start": 81.22, + "end": 81.44, + "confidence": 0.985 + }, + { + "text": "Jean-Marie", + "start": 81.44, + "end": 81.72, + "confidence": 0.95 + }, + { + "text": "Le", + "start": 81.72, + "end": 81.9, + "confidence": 0.995 + }, + { + "text": "Pen", + "start": 81.9, + "end": 82.0, + "confidence": 0.999 + }, + { + "text": "à", + "start": 82.0, + "end": 82.16, + "confidence": 0.977 + }, + { + "text": "la", + "start": 82.16, + "end": 82.22, + "confidence": 0.995 + }, + { + "text": "tête", + "start": 82.22, + "end": 82.32, + "confidence": 0.925 + }, + { + "text": "du", + "start": 82.32, + "end": 82.46, + "confidence": 0.997 + }, + { + "text": "Front", + "start": 82.46, + "end": 82.66, + "confidence": 0.777 + }, + { + "text": "National.", + "start": 82.66, + "end": 83.04, + "confidence": 0.822 + } + ] + }, + { + "id": 8, + "seek": 8300, + "start": 83.34, + "end": 88.88, + "text": " La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story.", + "tokens": [ + 50364, + 2369, + 3512, + 77, + 3856, + 368, + 2255, + 13887, + 368, + 20415, + 1456, + 10571, + 18137, + 14023, + 6, + 10556, + 2680, + 476, + 14662, + 11, + 20415, + 1456, + 10571, + 15021, + 465, + 2047, + 2680, + 363, + 37, + 44, + 1657, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.11255872032859109, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.40272650122642517, + "confidence": 0.955, + "words": [ + { + "text": "La", + "start": 83.34, + "end": 83.68, + "confidence": 0.922 + }, + { + "text": "tournée", + "start": 83.68, + "end": 84.16, + "confidence": 0.994 + }, + { + "text": "de", + "start": 84.16, + "end": 84.48, + "confidence": 0.936 + }, + { + "text": "campagne", + "start": 84.48, + "end": 84.8, + "confidence": 0.981 + }, + { + "text": "de", + "start": 84.8, + "end": 84.98, + "confidence": 0.972 + }, + { + "text": "Marine", + "start": 84.98, + "end": 85.14, + "confidence": 0.997 + }, + { + "text": "Le", + "start": 85.14, + "end": 85.32, + "confidence": 0.997 + }, + { + "text": "Pen", + "start": 85.32, + "end": 85.44, + "confidence": 0.999 + }, + { + "text": "commence", + "start": 85.44, + "end": 85.8, + "confidence": 0.971 + }, + { + "text": "aujourd'hui", + "start": 85.8, + "end": 86.14, + "confidence": 0.988 + }, + { + "text": "dans", + "start": 86.14, + "end": 86.28, + "confidence": 0.979 + }, + { + "text": "le", + "start": 86.28, + "end": 86.46, + "confidence": 0.923 + }, + { + "text": "Var,", + "start": 86.46, + "end": 86.64, + "confidence": 0.534 + }, + { + "text": "Marine", + "start": 86.74, + "end": 86.96, + "confidence": 0.992 + }, + { + "text": "Le", + "start": 86.96, + "end": 87.12, + "confidence": 0.992 + }, + { + "text": "Pen", + "start": 87.12, + "end": 87.22, + "confidence": 0.999 + }, + { + "text": "sera", + "start": 87.22, + "end": 87.48, + "confidence": 0.989 + }, + { + "text": "en", + "start": 87.48, + "end": 87.76, + "confidence": 0.989 + }, + { + "text": "direct", + "start": 87.76, + "end": 88.04, + "confidence": 0.997 + }, + { + "text": "dans", + "start": 88.04, + "end": 88.34, + "confidence": 0.926 + }, + { + "text": "BFM", + "start": 88.34, + "end": 88.68, + "confidence": 0.996 + }, + { + "text": "story.", + "start": 88.68, + "end": 88.88, + "confidence": 0.831 + } + ] + }, + { + "id": 9, + "seek": 8300, + "start": 89.02, + "end": 92.28, + "text": " Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite.", + "tokens": [ + 50664, + 13094, + 4371, + 4163, + 4666, + 20415, + 1456, + 10571, + 2680, + 13099, + 368, + 805, + 2077, + 11, + 1531, + 3486, + 368, + 14205, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.11255872032859109, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.40272650122642517, + "confidence": 0.936, + "words": [ + { + "text": "Restez", + "start": 89.02, + "end": 89.4, + "confidence": 0.967 + }, + { + "text": "avec", + "start": 89.4, + "end": 89.7, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 89.7, + "end": 89.94, + "confidence": 0.956 + }, + { + "text": "Marine", + "start": 89.94, + "end": 90.24, + "confidence": 0.911 + }, + { + "text": "Le", + "start": 90.24, + "end": 90.52, + "confidence": 0.998 + }, + { + "text": "Pen", + "start": 90.52, + "end": 90.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 90.68, + "end": 90.98, + "confidence": 0.843 + }, + { + "text": "moins", + "start": 90.98, + "end": 91.2, + "confidence": 0.992 + }, + { + "text": "de", + "start": 91.2, + "end": 91.34, + "confidence": 0.991 + }, + { + "text": "3", + "start": 91.34, + "end": 91.52, + "confidence": 0.598 + }, + { + "text": "minutes,", + "start": 91.52, + "end": 91.8, + "confidence": 0.985 + }, + { + "text": "à", + "start": 91.82, + "end": 91.98, + "confidence": 0.961 + }, + { + "text": "tout", + "start": 91.98, + "end": 92.1, + "confidence": 0.918 + }, + { + "text": "de", + "start": 92.1, + "end": 92.14, + "confidence": 0.999 + }, + { + "text": "suite.", + "start": 92.14, + "end": 92.28, + "confidence": 0.999 + } + ] + }, + { + "id": 10, + "seek": 9200, + "start": 92.28, + "end": 93.08, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.6870794296264648, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.79963219165802, + "confidence": 0.153, + "words": [ + { + "text": "...", + "start": 92.28, + "end": 93.08, + "confidence": 0.153 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/smartphone.mp3.words.json b/tests/expected/medium_fr/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..42afde27baf61388598dd94ed72c61d6acee02c8 --- /dev/null +++ b/tests/expected/medium_fr/smartphone.mp3.words.json @@ -0,0 +1,4899 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc à objet inédit, rapport inédit. Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit, d'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.4, + "end": 3.66, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.4, + "end": 0.64, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.64, + "end": 0.9, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.9, + "end": 1.0, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.0, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.48, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.7, + "end": 2.04, + "confidence": 0.979 + }, + { + "text": "je", + "start": 2.04, + "end": 2.26, + "confidence": 0.981 + }, + { + "text": "ne", + "start": 2.26, + "end": 2.34, + "confidence": 0.835 + }, + { + "text": "me", + "start": 2.34, + "end": 2.36, + "confidence": 0.821 + }, + { + "text": "l'étais", + "start": 2.36, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.88, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.88, + "end": 3.26, + "confidence": 0.911 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.44, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.44, + "end": 3.66, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.16, + "end": 7.94, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 50549, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11, + 50756 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.931, + "words": [ + { + "text": "Ce", + "start": 4.16, + "end": 4.28, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.28, + "end": 4.36, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.36, + "end": 4.5, + "confidence": 0.568 + }, + { + "text": "la", + "start": 4.5, + "end": 4.76, + "confidence": 0.971 + }, + { + "text": "force", + "start": 4.76, + "end": 5.02, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.02, + "end": 5.22, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.22, + "end": 5.7, + "confidence": 0.911 + }, + { + "text": "c'est", + "start": 6.04, + "end": 6.16, + "confidence": 0.88 + }, + { + "text": "pas", + "start": 6.16, + "end": 6.26, + "confidence": 0.992 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.54, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.54, + "end": 7.4, + "confidence": 0.957 + }, + { + "text": "des", + "start": 7.4, + "end": 7.58, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.58, + "end": 7.94, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 50756, + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.48, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.48, + "end": 8.7, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.7, + "end": 8.94, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.94, + "end": 9.08, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.08, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.38, + "confidence": 0.965 + }, + { + "text": "entre", + "start": 10.38, + "end": 10.7, + "confidence": 0.955 + }, + { + "text": "elles.", + "start": 10.7, + "end": 10.88, + "confidence": 0.989 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 50906, + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.608 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.26, + "confidence": 0.764 + }, + { + "text": "dit", + "start": 11.26, + "end": 11.38, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.38, + "end": 11.58, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.58, + "end": 11.72, + "confidence": 0.48 + }, + { + "text": "la", + "start": 11.72, + "end": 11.82, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.82, + "end": 12.0, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.14, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.48, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.48, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.38, + "end": 16.04, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13, + 51166 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.913, + "words": [ + { + "text": "Alors", + "start": 13.38, + "end": 13.58, + "confidence": 0.585 + }, + { + "text": "évidemment,", + "start": 13.58, + "end": 13.86, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.26, + "end": 14.42, + "confidence": 0.953 + }, + { + "text": "faudrait", + "start": 14.42, + "end": 14.76, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.2, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.2, + "end": 15.6, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.6, + "end": 16.04, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 51166, + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13, + 51331 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.924, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.78, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.78, + "end": 17.1, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.1, + "end": 17.3, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.3, + "end": 17.84, + "confidence": 0.975 + }, + { + "text": "beaucoup", + "start": 17.84, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.448 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 25.44, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 51331, + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13, + 51631 + ], + "temperature": 0.0, + "avg_logprob": -0.23621009675082782, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1557241678237915, + "confidence": 0.921, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.943 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.5, + "confidence": 0.666 + }, + { + "text": "il", + "start": 20.52, + "end": 20.62, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.62, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.82, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.82, + "end": 21.04, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 21.04, + "end": 21.3, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.3, + "end": 21.7, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.7, + "end": 21.94, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.94, + "end": 22.16, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.16, + "end": 22.3, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.3, + "end": 22.54, + "confidence": 0.723 + }, + { + "text": "lesquels", + "start": 22.54, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.14, + "confidence": 0.709 + }, + { + "text": "ordinateurs", + "start": 23.14, + "end": 23.58, + "confidence": 0.966 + }, + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.88, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.88, + "end": 24.12, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.829 + }, + { + "text": "cliquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.72, + "end": 24.92, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.92, + "end": 25.44, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.54, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.968, + "words": [ + { + "text": "Sauf", + "start": 25.54, + "end": 25.82, + "confidence": 0.974 + }, + { + "text": "que", + "start": 25.82, + "end": 26.3, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.3, + "end": 26.66, + "confidence": 0.672 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.08, + "confidence": 0.995 + }, + { + "text": "ajoute", + "start": 27.08, + "end": 27.48, + "confidence": 0.99 + }, + { + "text": "le", + "start": 27.48, + "end": 27.66, + "confidence": 0.989 + }, + { + "text": "toucher,", + "start": 27.66, + "end": 28.06, + "confidence": 0.986 + }, + { + "text": "ce", + "start": 28.14, + "end": 28.16, + "confidence": 0.951 + }, + { + "text": "qui", + "start": 28.16, + "end": 28.3, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.3, + "end": 28.54, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.54, + "end": 28.8, + "confidence": 0.992 + }, + { + "text": "contact", + "start": 28.8, + "end": 29.16, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.16, + "end": 29.58, + "confidence": 0.983 + }, + { + "text": "direct,", + "start": 29.58, + "end": 30.0, + "confidence": 0.991 + }, + { + "text": "plus", + "start": 30.1, + "end": 30.24, + "confidence": 0.99 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 50639, + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.893, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.963 + }, + { + "text": "puis", + "start": 31.24, + "end": 31.34, + "confidence": 0.967 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.62, + "confidence": 0.868 + }, + { + "text": "il", + "start": 31.66, + "end": 31.72, + "confidence": 0.99 + }, + { + "text": "faudrait", + "start": 31.72, + "end": 31.94, + "confidence": 0.996 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.16, + "confidence": 0.866 + }, + { + "text": "aussi", + "start": 32.16, + "end": 32.34, + "confidence": 0.971 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.993 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.92, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.92, + "end": 33.22, + "confidence": 0.562 + }, + { + "text": "permettent", + "start": 33.22, + "end": 33.76, + "confidence": 0.99 + }, + { + "text": "de", + "start": 33.76, + "end": 33.98, + "confidence": 0.872 + }, + { + "text": "contourner", + "start": 33.98, + "end": 34.42, + "confidence": 0.953 + }, + { + "text": "le", + "start": 34.42, + "end": 34.54, + "confidence": 0.75 + }, + { + "text": "côté", + "start": 34.54, + "end": 34.78, + "confidence": 0.985 + }, + { + "text": "touffu", + "start": 34.78, + "end": 35.32, + "confidence": 0.727 + }, + { + "text": "de", + "start": 35.32, + "end": 35.7, + "confidence": 0.881 + }, + { + "text": "la", + "start": 35.7, + "end": 35.82, + "confidence": 0.991 + }, + { + "text": "navigation", + "start": 35.82, + "end": 36.3, + "confidence": 0.992 + }, + { + "text": "web", + "start": 36.3, + "end": 36.58, + "confidence": 0.847 + }, + { + "text": "pour", + "start": 36.58, + "end": 36.76, + "confidence": 0.589 + }, + { + "text": "aller", + "start": 36.76, + "end": 37.16, + "confidence": 0.981 + }, + { + "text": "directement", + "start": 37.16, + "end": 37.54, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.54, + "end": 37.7, + "confidence": 0.969 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.9, + "end": 46.6, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 50989, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51439 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.973, + "words": [ + { + "text": "Bref,", + "start": 37.9, + "end": 38.24, + "confidence": 0.984 + }, + { + "text": "tout", + "start": 38.88, + "end": 39.02, + "confidence": 0.7 + }, + { + "text": "ça,", + "start": 39.02, + "end": 39.4, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.4, + "end": 39.72, + "confidence": 0.989 + }, + { + "text": "sont", + "start": 39.72, + "end": 39.92, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.92, + "end": 40.22, + "confidence": 0.992 + }, + { + "text": "conditions", + "start": 40.22, + "end": 40.7, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.7, + "end": 41.02, + "confidence": 0.996 + }, + { + "text": "permettent", + "start": 41.02, + "end": 41.44, + "confidence": 0.996 + }, + { + "text": "de", + "start": 41.44, + "end": 41.74, + "confidence": 0.997 + }, + { + "text": "créer", + "start": 41.74, + "end": 42.1, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.1, + "end": 42.38, + "confidence": 0.997 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.64, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.64, + "end": 42.84, + "confidence": 0.79 + }, + { + "text": "Nicolas", + "start": 42.84, + "end": 43.24, + "confidence": 0.988 + }, + { + "text": "dit", + "start": 43.24, + "end": 43.52, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.74, + "confidence": 0.982 + }, + { + "text": "est", + "start": 43.74, + "end": 43.94, + "confidence": 0.99 + }, + { + "text": "vraisemblablement", + "start": 43.94, + "end": 44.86, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.44, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 45.44, + "end": 45.72, + "confidence": 0.967 + }, + { + "text": "l'histoire", + "start": 45.72, + "end": 46.02, + "confidence": 0.957 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.6, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 47.02, + "end": 48.78, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 51439, + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.1078022932394957, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.19658996164798737, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 47.02, + "end": 47.28, + "confidence": 0.841 + }, + { + "text": "ça,", + "start": 47.28, + "end": 47.48, + "confidence": 0.942 + }, + { + "text": "ça", + "start": 47.62, + "end": 47.64, + "confidence": 0.976 + }, + { + "text": "soulève", + "start": 47.64, + "end": 47.86, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.86, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.2, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.2, + "end": 48.78, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.36, + "end": 55.5, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ?", + "tokens": [ + 50389, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506, + 50689 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.986, + "words": [ + { + "text": "Est-ce", + "start": 49.36, + "end": 49.64, + "confidence": 0.978 + }, + { + "text": "que", + "start": 49.64, + "end": 49.76, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.76, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.0, + "confidence": 0.999 + }, + { + "text": "que", + "start": 50.0, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.989 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.78, + "confidence": 0.995 + }, + { + "text": "induit", + "start": 51.78, + "end": 52.32, + "confidence": 0.956 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.984 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.78, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 52.78, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.46, + "confidence": 0.979 + }, + { + "text": "lui", + "start": 53.46, + "end": 53.68, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.68, + "end": 54.14, + "confidence": 0.907 + }, + { + "text": "aussi", + "start": 54.14, + "end": 54.52, + "confidence": 0.996 + }, + { + "text": "un", + "start": 54.52, + "end": 54.72, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 54.72, + "end": 55.0, + "confidence": 0.997 + }, + { + "text": "inédit ?", + "start": 55.0, + "end": 55.5, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.76, + "end": 63.4, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50689, + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.96, + "words": [ + { + "text": "Je", + "start": 55.76, + "end": 55.9, + "confidence": 0.904 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.989 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.12, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.22, + "end": 56.38, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.38, + "end": 56.5, + "confidence": 0.991 + }, + { + "text": "le", + "start": 56.5, + "end": 56.66, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.66, + "end": 56.86, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.86, + "end": 57.08, + "confidence": 0.985 + }, + { + "text": "a", + "start": 57.08, + "end": 57.16, + "confidence": 0.99 + }, + { + "text": "au", + "start": 57.16, + "end": 57.28, + "confidence": 0.966 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.995 + }, + { + "text": "est", + "start": 57.6, + "end": 57.86, + "confidence": 0.911 + }, + { + "text": "comparable", + "start": 57.86, + "end": 58.3, + "confidence": 0.996 + }, + { + "text": "à", + "start": 58.3, + "end": 58.5, + "confidence": 0.946 + }, + { + "text": "celui", + "start": 58.5, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.96, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.96, + "end": 59.36, + "confidence": 0.926 + }, + { + "text": "à", + "start": 59.36, + "end": 59.48, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.48, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.98, + "confidence": 0.99 + }, + { + "text": "techniques", + "start": 59.98, + "end": 60.44, + "confidence": 0.984 + }, + { + "text": "comme", + "start": 60.44, + "end": 60.96, + "confidence": 0.496 + }, + { + "text": "la", + "start": 60.96, + "end": 61.52, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.52, + "end": 62.08, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.08, + "end": 62.52, + "confidence": 0.949 + }, + { + "text": "le", + "start": 62.52, + "end": 62.74, + "confidence": 0.998 + }, + { + "text": "téléphone ?", + "start": 62.74, + "end": 63.4, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 65.36, + "end": 66.62, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 51189, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13, + 51239 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.94, + "words": [ + { + "text": "Il", + "start": 65.36, + "end": 65.5, + "confidence": 0.85 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.3, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.3, + "end": 66.44, + "confidence": 0.582 + }, + { + "text": "fait.", + "start": 66.44, + "end": 66.62, + "confidence": 0.997 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.24, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 51239, + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.08238351049502034, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.957, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 67.02, + "confidence": 0.655 + }, + { + "text": "donc", + "start": 67.02, + "end": 67.12, + "confidence": 0.902 + }, + { + "text": "cette", + "start": 67.12, + "end": 67.3, + "confidence": 0.719 + }, + { + "text": "espèce", + "start": 67.3, + "end": 67.54, + "confidence": 0.996 + }, + { + "text": "de", + "start": 67.54, + "end": 67.76, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.76, + "end": 68.48, + "confidence": 0.981 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.84, + "confidence": 0.982 + }, + { + "text": "la", + "start": 68.84, + "end": 68.96, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.24, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.0, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 70.28, + "end": 70.38, + "confidence": 0.977 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.68, + "confidence": 0.98 + }, + { + "text": "et", + "start": 70.68, + "end": 70.76, + "confidence": 0.972 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.24, + "confidence": 0.978 + } + ] + }, + { + "id": 15, + "seek": 7134, + "start": 71.62, + "end": 76.42, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet,", + "tokens": [ + 50389, + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 394, + 368, + 8603, + 14964, + 11, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.86, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.9, + "confidence": 0.844 + }, + { + "text": "qu'on", + "start": 71.9, + "end": 72.4, + "confidence": 0.968 + }, + { + "text": "a", + "start": 72.4, + "end": 72.54, + "confidence": 0.979 + }, + { + "text": "l'impression,", + "start": 72.54, + "end": 72.9, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 73.66, + "end": 73.9, + "confidence": 0.982 + }, + { + "text": "le", + "start": 73.9, + "end": 74.02, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.02, + "end": 74.22, + "confidence": 0.997 + }, + { + "text": "les", + "start": 74.22, + "end": 74.52, + "confidence": 0.996 + }, + { + "text": "utilisateurs", + "start": 74.52, + "end": 74.84, + "confidence": 0.998 + }, + { + "text": "et", + "start": 74.84, + "end": 74.94, + "confidence": 0.843 + }, + { + "text": "les", + "start": 74.94, + "end": 75.02, + "confidence": 0.779 + }, + { + "text": "services,", + "start": 75.02, + "end": 75.18, + "confidence": 0.125 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.5, + "confidence": 0.934 + }, + { + "text": "dépendant", + "start": 75.5, + "end": 75.98, + "confidence": 0.709 + }, + { + "text": "de", + "start": 75.98, + "end": 76.08, + "confidence": 0.977 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.998 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.42, + "confidence": 0.997 + } + ] + }, + { + "id": 16, + "seek": 7134, + "start": 76.42, + "end": 83.3, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 50639, + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.923, + "words": [ + { + "text": "d'induire", + "start": 76.42, + "end": 77.1, + "confidence": 0.964 + }, + { + "text": "en", + "start": 77.1, + "end": 77.18, + "confidence": 0.721 + }, + { + "text": "fait", + "start": 77.18, + "end": 77.34, + "confidence": 0.997 + }, + { + "text": "une", + "start": 77.34, + "end": 77.5, + "confidence": 0.979 + }, + { + "text": "espèce", + "start": 77.5, + "end": 77.88, + "confidence": 0.997 + }, + { + "text": "de", + "start": 77.88, + "end": 78.28, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.28, + "end": 78.54, + "confidence": 0.42 + }, + { + "text": "de", + "start": 78.66, + "end": 78.96, + "confidence": 0.992 + }, + { + "text": "médiation", + "start": 78.96, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.76, + "confidence": 0.954 + }, + { + "text": "le", + "start": 79.76, + "end": 80.02, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 80.02, + "end": 80.3, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 80.3, + "end": 81.24, + "confidence": 0.664 + }, + { + "text": "rend", + "start": 81.24, + "end": 81.66, + "confidence": 0.968 + }, + { + "text": "de", + "start": 81.66, + "end": 81.74, + "confidence": 0.688 + }, + { + "text": "l'ampleur", + "start": 81.74, + "end": 82.02, + "confidence": 0.995 + }, + { + "text": "et", + "start": 82.02, + "end": 82.1, + "confidence": 0.953 + }, + { + "text": "qui", + "start": 82.1, + "end": 82.2, + "confidence": 0.987 + }, + { + "text": "amène", + "start": 82.2, + "end": 82.38, + "confidence": 0.971 + }, + { + "text": "aussi", + "start": 82.38, + "end": 82.54, + "confidence": 0.947 + }, + { + "text": "à", + "start": 82.54, + "end": 82.62, + "confidence": 0.959 + }, + { + "text": "des", + "start": 82.62, + "end": 82.7, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.7, + "end": 82.88, + "confidence": 0.995 + }, + { + "text": "de", + "start": 82.88, + "end": 83.04, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.04, + "end": 83.3, + "confidence": 0.802 + } + ] + }, + { + "id": 17, + "seek": 7134, + "start": 83.92, + "end": 87.7, + "text": " Donc à objet inédit, rapport inédit.", + "tokens": [ + 50989, + 7477, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13, + 51189 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.901, + "words": [ + { + "text": "Donc", + "start": 83.92, + "end": 84.46, + "confidence": 0.984 + }, + { + "text": "à", + "start": 84.46, + "end": 84.98, + "confidence": 0.481 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.46, + "confidence": 0.771 + }, + { + "text": "inédit,", + "start": 85.46, + "end": 86.2, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 86.54, + "end": 86.9, + "confidence": 0.986 + }, + { + "text": "inédit.", + "start": 86.9, + "end": 87.7, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7134, + "start": 88.02, + "end": 94.92, + "text": " Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 51189, + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 1531, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.11261525396573341, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181636437773705, + "confidence": 0.974, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.62, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 88.62, + "end": 88.9, + "confidence": 0.985 + }, + { + "text": "rapport,", + "start": 88.9, + "end": 89.32, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.78, + "confidence": 0.997 + }, + { + "text": "crois", + "start": 89.78, + "end": 89.88, + "confidence": 0.988 + }, + { + "text": "à", + "start": 89.88, + "end": 90.06, + "confidence": 0.72 + }, + { + "text": "Nicolas,", + "start": 90.06, + "end": 90.24, + "confidence": 0.997 + }, + { + "text": "serait", + "start": 90.7, + "end": 91.0, + "confidence": 0.904 + }, + { + "text": "caractérisé", + "start": 91.0, + "end": 91.8, + "confidence": 0.994 + }, + { + "text": "par", + "start": 91.8, + "end": 92.22, + "confidence": 0.995 + }, + { + "text": "un", + "start": 92.22, + "end": 92.52, + "confidence": 0.996 + }, + { + "text": "mélange", + "start": 92.52, + "end": 93.04, + "confidence": 0.999 + }, + { + "text": "de", + "start": 93.04, + "end": 93.46, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.46, + "end": 94.12, + "confidence": 0.937 + }, + { + "text": "et", + "start": 94.12, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.72, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.72, + "end": 94.92, + "confidence": 0.988 + } + ] + }, + { + "id": 19, + "seek": 9484, + "start": 95.74, + "end": 102.82, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 50389, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.932, + "words": [ + { + "text": "Bon,", + "start": 95.74, + "end": 96.0, + "confidence": 0.753 + }, + { + "text": "en", + "start": 96.34, + "end": 96.54, + "confidence": 0.992 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.86, + "confidence": 0.991 + }, + { + "text": "il", + "start": 97.06, + "end": 97.18, + "confidence": 0.992 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.996 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.06, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.06, + "end": 98.58, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.58, + "end": 98.84, + "confidence": 0.759 + }, + { + "text": "finement", + "start": 98.84, + "end": 99.38, + "confidence": 0.823 + }, + { + "text": "toute", + "start": 99.38, + "end": 99.7, + "confidence": 0.937 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.08, + "confidence": 0.996 + }, + { + "text": "des", + "start": 100.08, + "end": 100.26, + "confidence": 0.991 + }, + { + "text": "objets", + "start": 100.26, + "end": 100.52, + "confidence": 0.998 + }, + { + "text": "techniques", + "start": 100.52, + "end": 100.94, + "confidence": 0.982 + }, + { + "text": "et", + "start": 100.94, + "end": 101.54, + "confidence": 0.512 + }, + { + "text": "de", + "start": 101.54, + "end": 101.7, + "confidence": 0.984 + }, + { + "text": "leur", + "start": 101.7, + "end": 101.84, + "confidence": 0.82 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.34, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.969 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.82, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 9484, + "start": 102.86, + "end": 105.72, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 50739, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13, + 50889 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.995, + "words": [ + { + "text": "pour", + "start": 102.86, + "end": 103.1, + "confidence": 0.989 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.995 + }, + { + "text": "si", + "start": 103.64, + "end": 103.76, + "confidence": 0.986 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.26, + "end": 104.84, + "confidence": 0.997 + }, + { + "text": "totalement", + "start": 104.84, + "end": 105.3, + "confidence": 0.998 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.72, + "confidence": 0.998 + } + ] + }, + { + "id": 21, + "seek": 9484, + "start": 106.14, + "end": 109.32, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 50889, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.931, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.4, + "confidence": 0.975 + }, + { + "text": "j'ai", + "start": 106.4, + "end": 106.92, + "confidence": 0.957 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.38, + "confidence": 0.999 + }, + { + "text": "comme", + "start": 107.38, + "end": 107.58, + "confidence": 0.513 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.82, + "confidence": 0.969 + }, + { + "text": "que", + "start": 107.82, + "end": 108.14, + "confidence": 0.974 + }, + { + "text": "Nicolas", + "start": 108.14, + "end": 108.48, + "confidence": 0.983 + }, + { + "text": "ne", + "start": 108.48, + "end": 108.6, + "confidence": 0.713 + }, + { + "text": "se", + "start": 108.6, + "end": 108.72, + "confidence": 0.992 + }, + { + "text": "trompe", + "start": 108.72, + "end": 108.88, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.32, + "confidence": 0.989 + } + ] + }, + { + "id": 22, + "seek": 9484, + "start": 109.94, + "end": 115.06, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 51089, + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 51389 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.964, + "words": [ + { + "text": "Pour", + "start": 109.94, + "end": 110.1, + "confidence": 0.995 + }, + { + "text": "autant", + "start": 110.1, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.38, + "confidence": 0.984 + }, + { + "text": "je", + "start": 110.38, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 110.84, + "confidence": 0.953 + }, + { + "text": "il", + "start": 111.08, + "end": 111.16, + "confidence": 0.994 + }, + { + "text": "y", + "start": 111.16, + "end": 111.3, + "confidence": 0.994 + }, + { + "text": "a", + "start": 111.3, + "end": 111.32, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.32, + "end": 111.62, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.62, + "end": 111.9, + "confidence": 0.966 + }, + { + "text": "de", + "start": 111.9, + "end": 112.14, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.14, + "end": 112.66, + "confidence": 0.83 + }, + { + "text": "autour", + "start": 112.66, + "end": 113.02, + "confidence": 0.995 + }, + { + "text": "de", + "start": 113.02, + "end": 113.38, + "confidence": 0.996 + }, + { + "text": "la", + "start": 113.38, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.88, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.06, + "confidence": 0.69 + }, + { + "text": "même", + "start": 114.06, + "end": 114.34, + "confidence": 0.995 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.992 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.06, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 9484, + "start": 115.48, + "end": 117.7, + "text": " Mais la dépendance n'était pas du même ordre.", + "tokens": [ + 51389, + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.992, + "words": [ + { + "text": "Mais", + "start": 115.48, + "end": 115.78, + "confidence": 0.993 + }, + { + "text": "la", + "start": 115.78, + "end": 116.04, + "confidence": 0.944 + }, + { + "text": "dépendance", + "start": 116.04, + "end": 116.42, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.7, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.7, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.2, + "confidence": 0.996 + }, + { + "text": "même", + "start": 117.2, + "end": 117.44, + "confidence": 0.998 + }, + { + "text": "ordre.", + "start": 117.44, + "end": 117.7, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 9484, + "start": 117.72, + "end": 119.78, + "text": " Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 51489, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.978, + "words": [ + { + "text": "Donc", + "start": 117.72, + "end": 118.02, + "confidence": 0.959 + }, + { + "text": "le", + "start": 118.02, + "end": 118.4, + "confidence": 0.815 + }, + { + "text": "rejet", + "start": 118.4, + "end": 118.64, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.64, + "end": 118.82, + "confidence": 0.982 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.94, + "confidence": 0.994 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.18, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 119.18, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.42, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.42, + "end": 119.58, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.58, + "end": 119.78, + "confidence": 0.999 + } + ] + }, + { + "id": 25, + "seek": 9484, + "start": 120.04, + "end": 123.1, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 51589, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 51789 + ], + "temperature": 0.0, + "avg_logprob": -0.07804971811722736, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434893175959587, + "confidence": 0.971, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.16, + "confidence": 0.757 + }, + { + "text": "peut", + "start": 120.16, + "end": 120.36, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.68, + "confidence": 0.991 + }, + { + "text": "sa", + "start": 120.68, + "end": 120.88, + "confidence": 0.98 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.34, + "confidence": 0.985 + }, + { + "text": "en", + "start": 121.42, + "end": 121.56, + "confidence": 0.971 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.8, + "confidence": 0.997 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.46, + "confidence": 0.989 + }, + { + "text": "plein", + "start": 122.46, + "end": 122.7, + "confidence": 0.987 + }, + { + "text": "de", + "start": 122.7, + "end": 122.78, + "confidence": 0.997 + }, + { + "text": "choses.", + "start": 122.78, + "end": 123.1, + "confidence": 0.99 + } + ] + }, + { + "id": 26, + "seek": 12334, + "start": 123.38, + "end": 126.44, + "text": " Le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50389, + 1456, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.941, + "words": [ + { + "text": "Le", + "start": 123.38, + "end": 123.96, + "confidence": 0.646 + }, + { + "text": "soir,", + "start": 123.96, + "end": 124.54, + "confidence": 0.998 + }, + { + "text": "quand", + "start": 124.74, + "end": 124.94, + "confidence": 0.982 + }, + { + "text": "on", + "start": 124.94, + "end": 125.06, + "confidence": 0.996 + }, + { + "text": "va", + "start": 125.06, + "end": 125.16, + "confidence": 0.989 + }, + { + "text": "se", + "start": 125.16, + "end": 125.26, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.7, + "confidence": 0.99 + }, + { + "text": "on", + "start": 125.94, + "end": 126.04, + "confidence": 0.992 + }, + { + "text": "la", + "start": 126.04, + "end": 126.18, + "confidence": 0.852 + }, + { + "text": "laisse.", + "start": 126.18, + "end": 126.44, + "confidence": 0.999 + } + ] + }, + { + "id": 27, + "seek": 12334, + "start": 127.0, + "end": 130.3, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 50539, + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.918, + "words": [ + { + "text": "On", + "start": 127.0, + "end": 127.34, + "confidence": 0.967 + }, + { + "text": "ne", + "start": 127.34, + "end": 127.46, + "confidence": 0.799 + }, + { + "text": "l'a", + "start": 127.46, + "end": 127.56, + "confidence": 0.929 + }, + { + "text": "pas", + "start": 127.56, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.86, + "confidence": 0.992 + }, + { + "text": "la", + "start": 127.86, + "end": 128.08, + "confidence": 0.996 + }, + { + "text": "main", + "start": 128.08, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.48, + "confidence": 0.936 + }, + { + "text": "on", + "start": 128.48, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.74, + "confidence": 0.992 + }, + { + "text": "au", + "start": 128.74, + "end": 128.94, + "confidence": 0.976 + }, + { + "text": "lit,", + "start": 128.94, + "end": 129.1, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.16, + "end": 129.28, + "confidence": 0.432 + }, + { + "text": "ne", + "start": 129.28, + "end": 129.3, + "confidence": 0.948 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.58, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 129.58, + "end": 129.7, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.7, + "end": 129.88, + "confidence": 0.72 + }, + { + "text": "chiottes.", + "start": 129.88, + "end": 130.3, + "confidence": 0.886 + } + ] + }, + { + "id": 28, + "seek": 12334, + "start": 130.82, + "end": 136.88, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 50739, + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51039 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.925, + "words": [ + { + "text": "On", + "start": 130.82, + "end": 131.06, + "confidence": 0.996 + }, + { + "text": "pouvait", + "start": 131.06, + "end": 131.26, + "confidence": 0.988 + }, + { + "text": "être", + "start": 131.26, + "end": 131.58, + "confidence": 0.995 + }, + { + "text": "énervé", + "start": 131.58, + "end": 132.22, + "confidence": 0.896 + }, + { + "text": "par", + "start": 132.22, + "end": 132.46, + "confidence": 0.992 + }, + { + "text": "son", + "start": 132.46, + "end": 132.72, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.72, + "end": 133.08, + "confidence": 0.758 + }, + { + "text": "qui", + "start": 133.08, + "end": 133.34, + "confidence": 0.917 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.74, + "confidence": 0.992 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.987 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.06, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.06, + "end": 134.22, + "confidence": 0.995 + }, + { + "text": "téléphone", + "start": 134.22, + "end": 134.6, + "confidence": 0.992 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.92, + "confidence": 0.71 + }, + { + "text": "une", + "start": 134.92, + "end": 135.16, + "confidence": 0.783 + }, + { + "text": "heure", + "start": 135.16, + "end": 135.34, + "confidence": 0.995 + }, + { + "text": "chaque", + "start": 135.34, + "end": 135.58, + "confidence": 0.982 + }, + { + "text": "soir", + "start": 135.58, + "end": 135.8, + "confidence": 0.995 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.98, + "confidence": 0.66 + }, + { + "text": "discuter", + "start": 135.98, + "end": 136.3, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.3, + "end": 136.5, + "confidence": 0.995 + }, + { + "text": "un", + "start": 136.5, + "end": 136.66, + "confidence": 0.997 + }, + { + "text": "copain.", + "start": 136.66, + "end": 136.88, + "confidence": 0.996 + } + ] + }, + { + "id": 29, + "seek": 12334, + "start": 137.26, + "end": 141.86, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 51039, + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11, + 51289 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.956, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.5, + "confidence": 0.989 + }, + { + "text": "ça", + "start": 137.5, + "end": 137.68, + "confidence": 0.927 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.88, + "confidence": 0.999 + }, + { + "text": "ressemblait", + "start": 137.88, + "end": 138.42, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 138.42, + "end": 138.78, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.78, + "end": 138.9, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 138.9, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.12, + "confidence": 0.976 + }, + { + "text": "peut", + "start": 139.12, + "end": 139.48, + "confidence": 0.979 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.1, + "confidence": 0.996 + }, + { + "text": "à", + "start": 140.1, + "end": 140.32, + "confidence": 0.575 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.731 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.7, + "confidence": 0.985 + }, + { + "text": "même", + "start": 140.7, + "end": 140.94, + "confidence": 0.982 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.28, + "confidence": 0.997 + }, + { + "text": "aujourd'hui,", + "start": 141.28, + "end": 141.86, + "confidence": 0.994 + } + ] + }, + { + "id": 30, + "seek": 12334, + "start": 142.16, + "end": 144.22, + "text": " continuellement avec son smartphone dans la main,", + "tokens": [ + 51289, + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.961, + "words": [ + { + "text": "continuellement", + "start": 142.16, + "end": 142.94, + "confidence": 0.963 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.2, + "confidence": 0.844 + }, + { + "text": "son", + "start": 143.2, + "end": 143.42, + "confidence": 0.994 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.76, + "confidence": 0.972 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.92, + "confidence": 0.964 + }, + { + "text": "la", + "start": 143.92, + "end": 144.02, + "confidence": 0.997 + }, + { + "text": "main,", + "start": 144.02, + "end": 144.22, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 12334, + "start": 144.34, + "end": 148.8, + "text": " comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 51414, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11, + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13, + 51639 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.973, + "words": [ + { + "text": "comme", + "start": 144.34, + "end": 144.56, + "confidence": 0.985 + }, + { + "text": "si", + "start": 144.56, + "end": 144.66, + "confidence": 0.972 + }, + { + "text": "c'était", + "start": 144.66, + "end": 144.86, + "confidence": 0.987 + }, + { + "text": "une", + "start": 144.86, + "end": 144.98, + "confidence": 0.983 + }, + { + "text": "sorte", + "start": 144.98, + "end": 145.14, + "confidence": 0.992 + }, + { + "text": "de", + "start": 145.14, + "end": 145.34, + "confidence": 0.989 + }, + { + "text": "pacemaker", + "start": 145.34, + "end": 145.82, + "confidence": 0.932 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.4, + "confidence": 0.99 + }, + { + "text": "comme", + "start": 146.44, + "end": 146.6, + "confidence": 0.724 + }, + { + "text": "si", + "start": 146.6, + "end": 146.74, + "confidence": 0.993 + }, + { + "text": "le", + "start": 146.74, + "end": 147.0, + "confidence": 0.996 + }, + { + "text": "lâcher", + "start": 147.0, + "end": 147.36, + "confidence": 0.967 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.58, + "confidence": 0.991 + }, + { + "text": "entraîner", + "start": 147.58, + "end": 147.88, + "confidence": 0.982 + }, + { + "text": "sa", + "start": 147.88, + "end": 148.02, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.02, + "end": 148.3, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.3, + "end": 148.8, + "confidence": 0.998 + } + ] + }, + { + "id": 32, + "seek": 12334, + "start": 148.94, + "end": 152.02, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 51639, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13, + 51839 + ], + "temperature": 0.0, + "avg_logprob": -0.08259125505940298, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.011154274456202984, + "confidence": 0.973, + "words": [ + { + "text": "Bon,", + "start": 148.94, + "end": 149.24, + "confidence": 0.804 + }, + { + "text": "je", + "start": 149.26, + "end": 149.34, + "confidence": 0.933 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.46, + "confidence": 0.982 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.992 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.78, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.78, + "end": 149.9, + "confidence": 0.994 + }, + { + "text": "môme,", + "start": 149.9, + "end": 150.1, + "confidence": 0.999 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.5, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 150.5, + "end": 150.84, + "confidence": 0.979 + }, + { + "text": "évidemment", + "start": 150.84, + "end": 151.18, + "confidence": 0.979 + }, + { + "text": "valable", + "start": 151.18, + "end": 151.48, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.64, + "confidence": 0.996 + }, + { + "text": "nous", + "start": 151.64, + "end": 151.78, + "confidence": 0.998 + }, + { + "text": "aussi.", + "start": 151.78, + "end": 152.02, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 15284, + "start": 153.14, + "end": 154.68, + "text": " Donc, rapport inédit, d'accord.", + "tokens": [ + 50389, + 7477, + 11, + 18018, + 294, + 7811, + 270, + 11, + 274, + 6, + 19947, + 13, + 50489 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.814, + "words": [ + { + "text": "Donc,", + "start": 153.14, + "end": 153.24, + "confidence": 0.233 + }, + { + "text": "rapport", + "start": 153.26, + "end": 153.62, + "confidence": 0.854 + }, + { + "text": "inédit,", + "start": 153.62, + "end": 154.18, + "confidence": 0.992 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.68, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15284, + "start": 155.64, + "end": 158.52, + "text": " Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ?", + "tokens": [ + 50489, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.958, + "words": [ + { + "text": "Mais", + "start": 155.64, + "end": 155.88, + "confidence": 0.991 + }, + { + "text": "pourquoi", + "start": 155.88, + "end": 156.36, + "confidence": 0.992 + }, + { + "text": "a-t-on", + "start": 156.36, + "end": 156.7, + "confidence": 0.95 + }, + { + "text": "l'impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.99 + }, + { + "text": "n'en", + "start": 157.28, + "end": 157.5, + "confidence": 0.857 + }, + { + "text": "sortira", + "start": 157.5, + "end": 157.86, + "confidence": 0.974 + }, + { + "text": "jamais ?", + "start": 157.86, + "end": 158.52, + "confidence": 0.997 + } + ] + }, + { + "id": 35, + "seek": 15284, + "start": 159.12, + "end": 165.38, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ?", + "tokens": [ + 50639, + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.959, + "words": [ + { + "text": "Est-ce", + "start": 159.12, + "end": 159.34, + "confidence": 0.987 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.993 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.64, + "confidence": 0.998 + }, + { + "text": "en", + "start": 159.64, + "end": 159.88, + "confidence": 0.961 + }, + { + "text": "remettre", + "start": 159.88, + "end": 160.14, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.14, + "end": 160.46, + "confidence": 0.995 + }, + { + "text": "faute", + "start": 160.46, + "end": 160.64, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.64, + "end": 161.06, + "confidence": 0.982 + }, + { + "text": "les", + "start": 161.06, + "end": 161.3, + "confidence": 0.991 + }, + { + "text": "gens", + "start": 161.3, + "end": 161.46, + "confidence": 0.998 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.56, + "confidence": 0.724 + }, + { + "text": "ont", + "start": 161.56, + "end": 161.82, + "confidence": 0.996 + }, + { + "text": "créé", + "start": 161.82, + "end": 162.28, + "confidence": 0.985 + }, + { + "text": "cet", + "start": 162.28, + "end": 162.48, + "confidence": 0.817 + }, + { + "text": "outil", + "start": 162.48, + "end": 162.78, + "confidence": 0.993 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.36, + "confidence": 0.981 + }, + { + "text": "et", + "start": 163.36, + "end": 163.5, + "confidence": 0.983 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.84, + "confidence": 0.991 + }, + { + "text": "et", + "start": 163.86, + "end": 164.0, + "confidence": 0.554 + }, + { + "text": "diabolique", + "start": 164.0, + "end": 164.4, + "confidence": 0.95 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.927 + }, + { + "text": "que", + "start": 164.66, + "end": 164.86, + "confidence": 0.978 + }, + { + "text": "merveilleux ?", + "start": 164.86, + "end": 165.38, + "confidence": 0.997 + } + ] + }, + { + "id": 36, + "seek": 15284, + "start": 166.84, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 51039, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13, + 51139 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.986, + "words": [ + { + "text": "Les", + "start": 166.84, + "end": 167.08, + "confidence": 0.945 + }, + { + "text": "économistes", + "start": 167.08, + "end": 167.46, + "confidence": 0.997 + }, + { + "text": "parlent", + "start": 167.46, + "end": 167.72, + "confidence": 0.993 + }, + { + "text": "de", + "start": 167.72, + "end": 167.88, + "confidence": 0.992 + }, + { + "text": "dépendance", + "start": 167.88, + "end": 168.34, + "confidence": 0.986 + }, + { + "text": "du", + "start": 168.34, + "end": 168.52, + "confidence": 0.996 + }, + { + "text": "sentier.", + "start": 168.52, + "end": 168.82, + "confidence": 0.983 + } + ] + }, + { + "id": 37, + "seek": 15284, + "start": 168.98, + "end": 177.38, + "text": " C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 51139, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 1131, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.12395508188596914, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031276822090149, + "confidence": 0.907, + "words": [ + { + "text": "C'est", + "start": 168.98, + "end": 169.2, + "confidence": 0.996 + }, + { + "text": "l'idée", + "start": 169.2, + "end": 169.38, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 169.38, + "end": 169.74, + "confidence": 0.699 + }, + { + "text": "met", + "start": 169.74, + "end": 169.88, + "confidence": 0.648 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.1, + "confidence": 0.995 + }, + { + "text": "un", + "start": 170.1, + "end": 170.52, + "confidence": 0.997 + }, + { + "text": "sentier", + "start": 170.52, + "end": 170.84, + "confidence": 0.991 + }, + { + "text": "qui", + "start": 170.84, + "end": 170.9, + "confidence": 0.955 + }, + { + "text": "a", + "start": 170.9, + "end": 171.0, + "confidence": 0.969 + }, + { + "text": "été", + "start": 171.0, + "end": 171.14, + "confidence": 0.994 + }, + { + "text": "établi,", + "start": 171.14, + "end": 171.5, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.76, + "end": 172.1, + "confidence": 0.527 + }, + { + "text": "volontairement", + "start": 172.1, + "end": 172.7, + "confidence": 0.987 + }, + { + "text": "en", + "start": 172.7, + "end": 172.86, + "confidence": 0.927 + }, + { + "text": "marchant", + "start": 172.86, + "end": 173.1, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.1, + "end": 173.48, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 173.86, + "end": 174.28, + "confidence": 0.748 + }, + { + "text": "en", + "start": 174.28, + "end": 175.16, + "confidence": 0.967 + }, + { + "text": "définissant", + "start": 175.16, + "end": 175.54, + "confidence": 0.981 + }, + { + "text": "des", + "start": 175.54, + "end": 175.74, + "confidence": 0.99 + }, + { + "text": "bornes,", + "start": 175.74, + "end": 175.98, + "confidence": 0.977 + }, + { + "text": "en", + "start": 176.04, + "end": 176.14, + "confidence": 0.674 + }, + { + "text": "définissant", + "start": 176.14, + "end": 176.66, + "confidence": 0.996 + }, + { + "text": "une", + "start": 176.66, + "end": 176.94, + "confidence": 0.983 + }, + { + "text": "signalétique.", + "start": 176.94, + "end": 177.38, + "confidence": 0.686 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/naive.cpu/accurate_apollo11.mp3.words.json b/tests/expected/naive.cpu/accurate_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..960a425ed608e1568f8c2f523148e8ea33ebb33c --- /dev/null +++ b/tests/expected/naive.cpu/accurate_apollo11.mp3.words.json @@ -0,0 +1,2375 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A. All right. Okay. Yeah, sir. Yeah, sir. Let's take that camera. Let's say it makes it want to go on the helmet we were going to have in B-1. The other one. And you can put the other one on the mic helmet. We'll show it to you in a quick screen. Over. All right. Yeah, sir. All right. Yeah, sir. There's a better helmet than B-1. That's the other one. Nice. Let's go in there. At least we're safe. We've got them in there. Helmet bags. And, uh, I guess we have helmets in the helmet bag. At least it's in the helmet bag. Right here. Right here. Yeah, we're taking it next day out of the field up. Hey, we were... You want to hack me on this? Hey, we were... You want to hack me on this? With a cover, I tried it already. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd... You could check it out. It's not much of an order to turn that. So, uh, I guess we're going to come up with this. Let us know. Okay. No problem. Okay. No problem. We'll let you know when the end of... None. Okay.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.78, + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 44, + 12, + 36, + 12, + 38, + 12, + 36, + 12, + 32, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.578, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.426 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.842 + }, + { + "text": "Houston,", + "start": 1.54, + "end": 1.96, + "confidence": 0.666 + }, + { + "text": "we", + "start": 1.96, + "end": 2.0, + "confidence": 0.983 + }, + { + "text": "got", + "start": 2.0, + "end": 2.16, + "confidence": 0.742 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.993 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.977 + }, + { + "text": "for", + "start": 3.1, + "end": 3.54, + "confidence": 0.94 + }, + { + "text": "you", + "start": 3.54, + "end": 3.88, + "confidence": 0.993 + }, + { + "text": "on", + "start": 3.88, + "end": 4.26, + "confidence": 0.918 + }, + { + "text": "your", + "start": 4.26, + "end": 4.3, + "confidence": 0.976 + }, + { + "text": "Soyuz-EA", + "start": 4.3, + "end": 5.38, + "confidence": 0.453 + }, + { + "text": "GLEM-E-G-E-A.", + "start": 5.38, + "end": 6.78, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 7.78, + "end": 10.46, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.158, + "words": [ + { + "text": "All", + "start": 7.78, + "end": 7.82, + "confidence": 0.007 + }, + { + "text": "right.", + "start": 7.82, + "end": 10.46, + "confidence": 0.727 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 10.82, + "end": 11.38, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.105, + "words": [ + { + "text": "Okay.", + "start": 10.82, + "end": 11.38, + "confidence": 0.105 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 12.02, + "end": 13.44, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.101, + "words": [ + { + "text": "Yeah,", + "start": 12.02, + "end": 12.76, + "confidence": 0.076 + }, + { + "text": "sir.", + "start": 12.76, + "end": 13.44, + "confidence": 0.135 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.58, + "end": 14.02, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.052, + "words": [ + { + "text": "Yeah,", + "start": 13.58, + "end": 13.84, + "confidence": 0.108 + }, + { + "text": "sir.", + "start": 13.84, + "end": 14.02, + "confidence": 0.025 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 14.04, + "end": 14.6, + "text": " Let's take that camera.", + "tokens": [ + 961, + 311, + 747, + 300, + 2799, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.082, + "words": [ + { + "text": "Let's", + "start": 14.04, + "end": 14.26, + "confidence": 0.043 + }, + { + "text": "take", + "start": 14.26, + "end": 14.3, + "confidence": 0.04 + }, + { + "text": "that", + "start": 14.3, + "end": 14.36, + "confidence": 0.055 + }, + { + "text": "camera.", + "start": 14.36, + "end": 14.6, + "confidence": 0.269 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 14.62, + "end": 19.16, + "text": " Let's say it makes it want to go on the helmet we were going to have in B-1.", + "tokens": [ + 961, + 311, + 584, + 309, + 1669, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.311, + "words": [ + { + "text": "Let's", + "start": 14.62, + "end": 15.34, + "confidence": 0.181 + }, + { + "text": "say", + "start": 15.34, + "end": 15.56, + "confidence": 0.451 + }, + { + "text": "it", + "start": 15.56, + "end": 15.76, + "confidence": 0.043 + }, + { + "text": "makes", + "start": 15.76, + "end": 15.8, + "confidence": 0.62 + }, + { + "text": "it", + "start": 15.8, + "end": 16.04, + "confidence": 0.943 + }, + { + "text": "want", + "start": 16.04, + "end": 16.22, + "confidence": 0.118 + }, + { + "text": "to", + "start": 16.22, + "end": 16.34, + "confidence": 0.953 + }, + { + "text": "go", + "start": 16.34, + "end": 16.46, + "confidence": 0.775 + }, + { + "text": "on", + "start": 16.46, + "end": 16.74, + "confidence": 0.939 + }, + { + "text": "the", + "start": 16.74, + "end": 16.88, + "confidence": 0.532 + }, + { + "text": "helmet", + "start": 16.88, + "end": 17.36, + "confidence": 0.003 + }, + { + "text": "we", + "start": 17.36, + "end": 17.64, + "confidence": 0.018 + }, + { + "text": "were", + "start": 17.64, + "end": 17.86, + "confidence": 0.255 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.654 + }, + { + "text": "to", + "start": 18.06, + "end": 18.26, + "confidence": 0.966 + }, + { + "text": "have", + "start": 18.26, + "end": 18.3, + "confidence": 0.967 + }, + { + "text": "in", + "start": 18.3, + "end": 18.46, + "confidence": 0.899 + }, + { + "text": "B-1.", + "start": 18.46, + "end": 19.16, + "confidence": 0.489 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.4, + "end": 19.94, + "text": " The other one.", + "tokens": [ + 440, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.127, + "words": [ + { + "text": "The", + "start": 19.4, + "end": 19.44, + "confidence": 0.004 + }, + { + "text": "other", + "start": 19.44, + "end": 19.7, + "confidence": 0.144 + }, + { + "text": "one.", + "start": 19.7, + "end": 19.94, + "confidence": 0.656 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 20.12, + "end": 22.92, + "text": " And you can put the other one on the mic helmet.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.394, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.26, + "confidence": 0.358 + }, + { + "text": "you", + "start": 20.26, + "end": 20.38, + "confidence": 0.915 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.122 + }, + { + "text": "put", + "start": 20.54, + "end": 20.7, + "confidence": 0.92 + }, + { + "text": "the", + "start": 20.7, + "end": 20.88, + "confidence": 0.981 + }, + { + "text": "other", + "start": 20.88, + "end": 21.06, + "confidence": 0.997 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.97 + }, + { + "text": "on", + "start": 21.26, + "end": 22.14, + "confidence": 0.971 + }, + { + "text": "the", + "start": 22.14, + "end": 22.18, + "confidence": 0.085 + }, + { + "text": "mic", + "start": 22.18, + "end": 22.62, + "confidence": 0.679 + }, + { + "text": "helmet.", + "start": 22.62, + "end": 22.92, + "confidence": 0.085 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 22.94, + "end": 24.58, + "text": " We'll show it to you in a quick screen.", + "tokens": [ + 492, + 603, + 855, + 309, + 281, + 291, + 294, + 257, + 1702, + 2568, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.124, + "words": [ + { + "text": "We'll", + "start": 22.94, + "end": 23.32, + "confidence": 0.098 + }, + { + "text": "show", + "start": 23.32, + "end": 23.36, + "confidence": 0.008 + }, + { + "text": "it", + "start": 23.36, + "end": 23.54, + "confidence": 0.167 + }, + { + "text": "to", + "start": 23.54, + "end": 23.64, + "confidence": 0.268 + }, + { + "text": "you", + "start": 23.64, + "end": 23.8, + "confidence": 0.656 + }, + { + "text": "in", + "start": 23.8, + "end": 23.98, + "confidence": 0.246 + }, + { + "text": "a", + "start": 23.98, + "end": 24.02, + "confidence": 0.683 + }, + { + "text": "quick", + "start": 24.02, + "end": 24.14, + "confidence": 0.022 + }, + { + "text": "screen.", + "start": 24.14, + "end": 24.58, + "confidence": 0.114 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 24.94, + "end": 25.18, + "text": " Over.", + "tokens": [ + 4886, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.117, + "words": [ + { + "text": "Over.", + "start": 24.94, + "end": 25.18, + "confidence": 0.117 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 25.52, + "end": 27.48, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.094, + "words": [ + { + "text": "All", + "start": 25.52, + "end": 25.56, + "confidence": 0.002 + }, + { + "text": "right.", + "start": 25.56, + "end": 27.48, + "confidence": 0.622 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 31.32, + "end": 32.12, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.045, + "words": [ + { + "text": "Yeah,", + "start": 31.32, + "end": 31.6, + "confidence": 0.032 + }, + { + "text": "sir.", + "start": 31.6, + "end": 32.12, + "confidence": 0.063 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 32.66, + "end": 33.06, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.199, + "words": [ + { + "text": "All", + "start": 32.66, + "end": 32.94, + "confidence": 0.025 + }, + { + "text": "right.", + "start": 32.94, + "end": 33.06, + "confidence": 0.56 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 33.08, + "end": 33.56, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.036, + "words": [ + { + "text": "Yeah,", + "start": 33.08, + "end": 33.38, + "confidence": 0.145 + }, + { + "text": "sir.", + "start": 33.38, + "end": 33.56, + "confidence": 0.009 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 33.58, + "end": 35.12, + "text": " There's a better helmet than B-1.", + "tokens": [ + 821, + 311, + 257, + 1101, + 15922, + 813, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.046, + "words": [ + { + "text": "There's", + "start": 33.58, + "end": 33.96, + "confidence": 0.035 + }, + { + "text": "a", + "start": 33.96, + "end": 34.0, + "confidence": 0.302 + }, + { + "text": "better", + "start": 34.0, + "end": 34.04, + "confidence": 0.037 + }, + { + "text": "helmet", + "start": 34.04, + "end": 34.36, + "confidence": 0.009 + }, + { + "text": "than", + "start": 34.36, + "end": 34.7, + "confidence": 0.386 + }, + { + "text": "B-1.", + "start": 34.7, + "end": 35.12, + "confidence": 0.031 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 35.14, + "end": 35.96, + "text": " That's the other one.", + "tokens": [ + 663, + 311, + 264, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.259, + "words": [ + { + "text": "That's", + "start": 35.14, + "end": 35.48, + "confidence": 0.114 + }, + { + "text": "the", + "start": 35.48, + "end": 35.52, + "confidence": 0.609 + }, + { + "text": "other", + "start": 35.52, + "end": 35.7, + "confidence": 0.954 + }, + { + "text": "one.", + "start": 35.7, + "end": 35.96, + "confidence": 0.2 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 35.98, + "end": 36.24, + "text": " Nice.", + "tokens": [ + 5490, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.008, + "words": [ + { + "text": "Nice.", + "start": 35.98, + "end": 36.24, + "confidence": 0.008 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 37.5, + "end": 38.44, + "text": " Let's go in there.", + "tokens": [ + 961, + 311, + 352, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.127, + "words": [ + { + "text": "Let's", + "start": 37.5, + "end": 38.16, + "confidence": 0.024 + }, + { + "text": "go", + "start": 38.16, + "end": 38.2, + "confidence": 0.624 + }, + { + "text": "in", + "start": 38.2, + "end": 38.3, + "confidence": 0.037 + }, + { + "text": "there.", + "start": 38.3, + "end": 38.44, + "confidence": 0.571 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 38.6, + "end": 39.26, + "text": " At least we're safe.", + "tokens": [ + 1711, + 1935, + 321, + 434, + 3273, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.14, + "words": [ + { + "text": "At", + "start": 38.6, + "end": 38.64, + "confidence": 0.002 + }, + { + "text": "least", + "start": 38.64, + "end": 38.84, + "confidence": 0.269 + }, + { + "text": "we're", + "start": 38.84, + "end": 39.18, + "confidence": 0.138 + }, + { + "text": "safe.", + "start": 39.18, + "end": 39.26, + "confidence": 0.802 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 39.92, + "end": 40.52, + "text": " We've got them in there.", + "tokens": [ + 492, + 600, + 658, + 552, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.071, + "words": [ + { + "text": "We've", + "start": 39.92, + "end": 40.36, + "confidence": 0.042 + }, + { + "text": "got", + "start": 40.36, + "end": 40.4, + "confidence": 0.682 + }, + { + "text": "them", + "start": 40.4, + "end": 40.44, + "confidence": 0.004 + }, + { + "text": "in", + "start": 40.44, + "end": 40.48, + "confidence": 0.041 + }, + { + "text": "there.", + "start": 40.48, + "end": 40.52, + "confidence": 0.216 + } + ] + }, + { + "id": 21, + "seek": 2600, + "start": 40.52, + "end": 41.1, + "text": " Helmet bags.", + "tokens": [ + 6128, + 5537, + 10405, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.002, + "words": [ + { + "text": "Helmet", + "start": 40.52, + "end": 40.8, + "confidence": 0.002 + }, + { + "text": "bags.", + "start": 40.8, + "end": 41.1, + "confidence": 0.002 + } + ] + }, + { + "id": 22, + "seek": 2600, + "start": 41.12, + "end": 43.1, + "text": " And, uh, I guess we have helmets in the helmet bag.", + "tokens": [ + 400, + 11, + 2232, + 11, + 286, + 2041, + 321, + 362, + 42022, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.036, + "words": [ + { + "text": "And,", + "start": 41.12, + "end": 41.5, + "confidence": 0.02 + }, + { + "text": "uh,", + "start": 41.5, + "end": 41.54, + "confidence": 0.55 + }, + { + "text": "I", + "start": 41.54, + "end": 41.58, + "confidence": 0.409 + }, + { + "text": "guess", + "start": 41.58, + "end": 41.64, + "confidence": 0.031 + }, + { + "text": "we", + "start": 41.64, + "end": 41.68, + "confidence": 0.059 + }, + { + "text": "have", + "start": 41.68, + "end": 41.72, + "confidence": 0.018 + }, + { + "text": "helmets", + "start": 41.72, + "end": 41.76, + "confidence": 0.0 + }, + { + "text": "in", + "start": 41.76, + "end": 42.08, + "confidence": 0.025 + }, + { + "text": "the", + "start": 42.08, + "end": 42.38, + "confidence": 0.214 + }, + { + "text": "helmet", + "start": 42.38, + "end": 43.06, + "confidence": 0.001 + }, + { + "text": "bag.", + "start": 43.06, + "end": 43.1, + "confidence": 0.611 + } + ] + }, + { + "id": 23, + "seek": 2600, + "start": 43.56, + "end": 47.42, + "text": " At least it's in the helmet bag.", + "tokens": [ + 1711, + 1935, + 309, + 311, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.051, + "words": [ + { + "text": "At", + "start": 43.56, + "end": 44.06, + "confidence": 0.001 + }, + { + "text": "least", + "start": 44.06, + "end": 44.32, + "confidence": 0.325 + }, + { + "text": "it's", + "start": 44.32, + "end": 44.9, + "confidence": 0.169 + }, + { + "text": "in", + "start": 44.9, + "end": 45.46, + "confidence": 0.03 + }, + { + "text": "the", + "start": 45.46, + "end": 45.5, + "confidence": 0.345 + }, + { + "text": "helmet", + "start": 45.5, + "end": 46.54, + "confidence": 0.004 + }, + { + "text": "bag.", + "start": 46.54, + "end": 47.42, + "confidence": 0.095 + } + ] + }, + { + "id": 24, + "seek": 2600, + "start": 48.22, + "end": 48.44, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.02, + "words": [ + { + "text": "Right", + "start": 48.22, + "end": 48.26, + "confidence": 0.0 + }, + { + "text": "here.", + "start": 48.26, + "end": 48.44, + "confidence": 0.167 + } + ] + }, + { + "id": 25, + "seek": 2600, + "start": 48.46, + "end": 48.76, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.018, + "words": [ + { + "text": "Right", + "start": 48.46, + "end": 48.56, + "confidence": 0.001 + }, + { + "text": "here.", + "start": 48.56, + "end": 48.76, + "confidence": 0.078 + } + ] + }, + { + "id": 26, + "seek": 2600, + "start": 48.78, + "end": 55.4, + "text": " Yeah, we're taking it next day out of the field up.", + "tokens": [ + 865, + 11, + 321, + 434, + 1940, + 309, + 958, + 786, + 484, + 295, + 264, + 2519, + 493, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.285, + "words": [ + { + "text": "Yeah,", + "start": 48.78, + "end": 52.1, + "confidence": 0.124 + }, + { + "text": "we're", + "start": 52.1, + "end": 53.64, + "confidence": 0.622 + }, + { + "text": "taking", + "start": 53.64, + "end": 53.68, + "confidence": 0.352 + }, + { + "text": "it", + "start": 53.68, + "end": 53.76, + "confidence": 0.114 + }, + { + "text": "next", + "start": 53.76, + "end": 53.98, + "confidence": 0.411 + }, + { + "text": "day", + "start": 53.98, + "end": 54.2, + "confidence": 0.888 + }, + { + "text": "out", + "start": 54.2, + "end": 54.32, + "confidence": 0.322 + }, + { + "text": "of", + "start": 54.32, + "end": 54.56, + "confidence": 0.473 + }, + { + "text": "the", + "start": 54.56, + "end": 54.7, + "confidence": 0.357 + }, + { + "text": "field", + "start": 54.7, + "end": 55.2, + "confidence": 0.051 + }, + { + "text": "up.", + "start": 55.2, + "end": 55.4, + "confidence": 0.31 + } + ] + }, + { + "id": 27, + "seek": 5500, + "start": 55.42, + "end": 56.32, + "text": " Hey, we were...", + "tokens": [ + 1911, + 11, + 321, + 645, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.009, + "words": [ + { + "text": "Hey,", + "start": 55.42, + "end": 56.24, + "confidence": 0.015 + }, + { + "text": "we", + "start": 56.24, + "end": 56.28, + "confidence": 0.014 + }, + { + "text": "were...", + "start": 56.28, + "end": 56.32, + "confidence": 0.004 + } + ] + }, + { + "id": 28, + "seek": 5500, + "start": 56.5, + "end": 57.52, + "text": " You want to hack me on this?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 341, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.022, + "words": [ + { + "text": "You", + "start": 56.5, + "end": 56.64, + "confidence": 0.006 + }, + { + "text": "want", + "start": 56.64, + "end": 57.02, + "confidence": 0.007 + }, + { + "text": "to", + "start": 57.02, + "end": 57.36, + "confidence": 0.345 + }, + { + "text": "hack", + "start": 57.36, + "end": 57.4, + "confidence": 0.0 + }, + { + "text": "me", + "start": 57.4, + "end": 57.44, + "confidence": 0.088 + }, + { + "text": "on", + "start": 57.44, + "end": 57.48, + "confidence": 0.004 + }, + { + "text": "this?", + "start": 57.48, + "end": 57.52, + "confidence": 0.177 + } + ] + }, + { + "id": 29, + "seek": 5500, + "start": 57.52, + "end": 58.44, + "text": " Hey, we were...", + "tokens": [ + 1911, + 11, + 321, + 645, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.048, + "words": [ + { + "text": "Hey,", + "start": 57.52, + "end": 57.98, + "confidence": 0.033 + }, + { + "text": "we", + "start": 57.98, + "end": 58.14, + "confidence": 0.391 + }, + { + "text": "were...", + "start": 58.14, + "end": 58.44, + "confidence": 0.024 + } + ] + }, + { + "id": 30, + "seek": 5500, + "start": 58.46, + "end": 59.44, + "text": " You want to hack me on this?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 341, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.04, + "words": [ + { + "text": "You", + "start": 58.46, + "end": 58.7, + "confidence": 0.014 + }, + { + "text": "want", + "start": 58.7, + "end": 58.74, + "confidence": 0.017 + }, + { + "text": "to", + "start": 58.74, + "end": 58.78, + "confidence": 0.336 + }, + { + "text": "hack", + "start": 58.78, + "end": 58.82, + "confidence": 0.0 + }, + { + "text": "me", + "start": 58.82, + "end": 59.0, + "confidence": 0.417 + }, + { + "text": "on", + "start": 59.0, + "end": 59.2, + "confidence": 0.277 + }, + { + "text": "this?", + "start": 59.2, + "end": 59.44, + "confidence": 0.138 + } + ] + }, + { + "id": 31, + "seek": 5500, + "start": 59.76, + "end": 61.48, + "text": " With a cover, I tried it already.", + "tokens": [ + 2022, + 257, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.064, + "words": [ + { + "text": "With", + "start": 59.76, + "end": 60.12, + "confidence": 0.027 + }, + { + "text": "a", + "start": 60.12, + "end": 60.84, + "confidence": 0.553 + }, + { + "text": "cover,", + "start": 60.84, + "end": 61.32, + "confidence": 0.167 + }, + { + "text": "I", + "start": 61.32, + "end": 61.36, + "confidence": 0.846 + }, + { + "text": "tried", + "start": 61.36, + "end": 61.4, + "confidence": 0.046 + }, + { + "text": "it", + "start": 61.4, + "end": 61.44, + "confidence": 0.011 + }, + { + "text": "already.", + "start": 61.44, + "end": 61.48, + "confidence": 0.01 + } + ] + }, + { + "id": 32, + "seek": 5500, + "start": 62.38, + "end": 62.46, + "text": " Okay, fine.", + "tokens": [ + 1033, + 11, + 2489, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.062, + "words": [ + { + "text": "Okay,", + "start": 62.38, + "end": 62.42, + "confidence": 0.06 + }, + { + "text": "fine.", + "start": 62.42, + "end": 62.46, + "confidence": 0.065 + } + ] + }, + { + "id": 33, + "seek": 5500, + "start": 62.56, + "end": 63.44, + "text": " We weren't sure of that.", + "tokens": [ + 492, + 4999, + 380, + 988, + 295, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.043, + "words": [ + { + "text": "We", + "start": 62.56, + "end": 63.06, + "confidence": 0.003 + }, + { + "text": "weren't", + "start": 63.06, + "end": 63.1, + "confidence": 0.155 + }, + { + "text": "sure", + "start": 63.1, + "end": 63.14, + "confidence": 0.01 + }, + { + "text": "of", + "start": 63.14, + "end": 63.18, + "confidence": 0.018 + }, + { + "text": "that.", + "start": 63.18, + "end": 63.44, + "confidence": 0.144 + } + ] + }, + { + "id": 34, + "seek": 5500, + "start": 63.46, + "end": 63.78, + "text": " Just a suggestion.", + "tokens": [ + 1449, + 257, + 16541, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.004, + "words": [ + { + "text": "Just", + "start": 63.46, + "end": 63.5, + "confidence": 0.003 + }, + { + "text": "a", + "start": 63.5, + "end": 63.74, + "confidence": 0.015 + }, + { + "text": "suggestion.", + "start": 63.74, + "end": 63.78, + "confidence": 0.002 + } + ] + }, + { + "id": 35, + "seek": 5500, + "start": 63.78, + "end": 65.26, + "text": " We thought we'd...", + "tokens": [ + 492, + 1194, + 321, + 1116, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.011, + "words": [ + { + "text": "We", + "start": 63.78, + "end": 64.9, + "confidence": 0.003 + }, + { + "text": "thought", + "start": 64.9, + "end": 65.2, + "confidence": 0.001 + }, + { + "text": "we'd...", + "start": 65.2, + "end": 65.26, + "confidence": 0.036 + } + ] + }, + { + "id": 36, + "seek": 5500, + "start": 66.26, + "end": 66.46, + "text": " You could check it out.", + "tokens": [ + 509, + 727, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.03, + "words": [ + { + "text": "You", + "start": 66.26, + "end": 66.3, + "confidence": 0.004 + }, + { + "text": "could", + "start": 66.3, + "end": 66.34, + "confidence": 0.002 + }, + { + "text": "check", + "start": 66.34, + "end": 66.38, + "confidence": 0.003 + }, + { + "text": "it", + "start": 66.38, + "end": 66.42, + "confidence": 0.16 + }, + { + "text": "out.", + "start": 66.42, + "end": 66.46, + "confidence": 0.442 + } + ] + }, + { + "id": 37, + "seek": 5500, + "start": 66.82, + "end": 69.34, + "text": " It's not much of an order to turn that.", + "tokens": [ + 467, + 311, + 406, + 709, + 295, + 364, + 1668, + 281, + 1261, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.136, + "words": [ + { + "text": "It's", + "start": 66.82, + "end": 67.32, + "confidence": 0.035 + }, + { + "text": "not", + "start": 67.32, + "end": 68.36, + "confidence": 0.025 + }, + { + "text": "much", + "start": 68.36, + "end": 68.62, + "confidence": 0.683 + }, + { + "text": "of", + "start": 68.62, + "end": 68.8, + "confidence": 0.59 + }, + { + "text": "an", + "start": 68.8, + "end": 68.86, + "confidence": 0.137 + }, + { + "text": "order", + "start": 68.86, + "end": 68.96, + "confidence": 0.142 + }, + { + "text": "to", + "start": 68.96, + "end": 69.16, + "confidence": 0.043 + }, + { + "text": "turn", + "start": 69.16, + "end": 69.2, + "confidence": 0.1 + }, + { + "text": "that.", + "start": 69.2, + "end": 69.34, + "confidence": 0.536 + } + ] + }, + { + "id": 38, + "seek": 5500, + "start": 70.32, + "end": 72.02, + "text": " So, uh, I guess we're going to come up with this.", + "tokens": [ + 407, + 11, + 2232, + 11, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.495, + "words": [ + { + "text": "So,", + "start": 70.32, + "end": 70.36, + "confidence": 0.528 + }, + { + "text": "uh,", + "start": 70.36, + "end": 70.5, + "confidence": 0.497 + }, + { + "text": "I", + "start": 70.5, + "end": 70.54, + "confidence": 0.958 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.84, + "confidence": 0.986 + }, + { + "text": "we're", + "start": 70.84, + "end": 71.12, + "confidence": 0.504 + }, + { + "text": "going", + "start": 71.12, + "end": 71.3, + "confidence": 0.143 + }, + { + "text": "to", + "start": 71.3, + "end": 71.48, + "confidence": 0.956 + }, + { + "text": "come", + "start": 71.48, + "end": 71.52, + "confidence": 0.547 + }, + { + "text": "up", + "start": 71.52, + "end": 71.7, + "confidence": 0.796 + }, + { + "text": "with", + "start": 71.7, + "end": 71.88, + "confidence": 0.807 + }, + { + "text": "this.", + "start": 71.88, + "end": 72.02, + "confidence": 0.181 + } + ] + }, + { + "id": 39, + "seek": 5500, + "start": 72.04, + "end": 72.52, + "text": " Let us know.", + "tokens": [ + 961, + 505, + 458, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.323, + "words": [ + { + "text": "Let", + "start": 72.04, + "end": 72.14, + "confidence": 0.076 + }, + { + "text": "us", + "start": 72.14, + "end": 72.34, + "confidence": 0.304 + }, + { + "text": "know.", + "start": 72.34, + "end": 72.52, + "confidence": 0.686 + } + ] + }, + { + "id": 40, + "seek": 5500, + "start": 72.54, + "end": 72.7, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.026, + "words": [ + { + "text": "Okay.", + "start": 72.54, + "end": 72.7, + "confidence": 0.026 + } + ] + }, + { + "id": 41, + "seek": 5500, + "start": 74.2, + "end": 75.16, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.082, + "words": [ + { + "text": "No", + "start": 74.2, + "end": 74.42, + "confidence": 0.004 + }, + { + "text": "problem.", + "start": 74.42, + "end": 75.16, + "confidence": 0.351 + } + ] + }, + { + "id": 42, + "seek": 5500, + "start": 75.18, + "end": 75.9, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.237, + "words": [ + { + "text": "Okay.", + "start": 75.18, + "end": 75.9, + "confidence": 0.237 + } + ] + }, + { + "id": 43, + "seek": 5500, + "start": 75.92, + "end": 76.64, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.56, + "words": [ + { + "text": "No", + "start": 75.92, + "end": 76.22, + "confidence": 0.366 + }, + { + "text": "problem.", + "start": 76.22, + "end": 76.64, + "confidence": 0.692 + } + ] + }, + { + "id": 44, + "seek": 5500, + "start": 76.66, + "end": 78.1, + "text": " We'll let you know when the end of...", + "tokens": [ + 492, + 603, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.367, + "words": [ + { + "text": "We'll", + "start": 76.66, + "end": 76.82, + "confidence": 0.091 + }, + { + "text": "let", + "start": 76.82, + "end": 77.04, + "confidence": 0.723 + }, + { + "text": "you", + "start": 77.04, + "end": 77.18, + "confidence": 0.296 + }, + { + "text": "know", + "start": 77.18, + "end": 77.26, + "confidence": 0.984 + }, + { + "text": "when", + "start": 77.26, + "end": 77.38, + "confidence": 0.596 + }, + { + "text": "the", + "start": 77.38, + "end": 77.44, + "confidence": 0.683 + }, + { + "text": "end", + "start": 77.44, + "end": 77.56, + "confidence": 0.937 + }, + { + "text": "of...", + "start": 77.56, + "end": 78.1, + "confidence": 0.257 + } + ] + }, + { + "id": 45, + "seek": 5500, + "start": 78.12, + "end": 78.34, + "text": " None.", + "tokens": [ + 14492, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.017, + "words": [ + { + "text": "None.", + "start": 78.12, + "end": 78.34, + "confidence": 0.017 + } + ] + }, + { + "id": 46, + "seek": 7900, + "start": 78.52, + "end": 82.18, + "text": " Okay.", + "tokens": [ + 50364, + 1033, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.7262506484985352, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.0013847488444298506, + "confidence": 0.036, + "words": [ + { + "text": "Okay.", + "start": 78.52, + "end": 82.18, + "confidence": 0.036 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive.cpu/naive_apollo11.mp3.words.json b/tests/expected/naive.cpu/naive_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e4eef5136cb849167d2c3d19c38438515c0e4232 --- /dev/null +++ b/tests/expected/naive.cpu/naive_apollo11.mp3.words.json @@ -0,0 +1,3814 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.68, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.49, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.426 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.842 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.974 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.453 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.789 + }, + { + "text": "a", + "start": 2.16, + "end": 2.36, + "confidence": 0.992 + }, + { + "text": "recommendation", + "start": 2.36, + "end": 3.1, + "confidence": 0.971 + }, + { + "text": "for", + "start": 3.1, + "end": 3.54, + "confidence": 0.944 + }, + { + "text": "you", + "start": 3.54, + "end": 3.88, + "confidence": 0.99 + }, + { + "text": "on", + "start": 3.88, + "end": 4.26, + "confidence": 0.935 + }, + { + "text": "your", + "start": 4.26, + "end": 4.3, + "confidence": 0.974 + }, + { + "text": "Soyuz-VA", + "start": 4.3, + "end": 5.38, + "confidence": 0.325 + }, + { + "text": "GLEME", + "start": 5.38, + "end": 6.02, + "confidence": 0.171 + }, + { + "text": "GVA.", + "start": 6.02, + "end": 6.68, + "confidence": 0.318 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.7, + "end": 19.16, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.324, + "words": [ + { + "text": "Alright,", + "start": 6.7, + "end": 12.02, + "confidence": 0.083 + }, + { + "text": "okay,", + "start": 12.02, + "end": 12.82, + "confidence": 0.321 + }, + { + "text": "we", + "start": 12.82, + "end": 13.04, + "confidence": 0.365 + }, + { + "text": "like", + "start": 13.04, + "end": 13.5, + "confidence": 0.475 + }, + { + "text": "to", + "start": 13.5, + "end": 13.68, + "confidence": 0.124 + }, + { + "text": "say", + "start": 13.68, + "end": 14.98, + "confidence": 0.127 + }, + { + "text": "that", + "start": 14.98, + "end": 15.56, + "confidence": 0.244 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.387 + }, + { + "text": "make", + "start": 15.6, + "end": 15.78, + "confidence": 0.155 + }, + { + "text": "the", + "start": 15.78, + "end": 15.92, + "confidence": 0.172 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.507 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.36, + "confidence": 0.385 + }, + { + "text": "on", + "start": 16.36, + "end": 16.6, + "confidence": 0.324 + }, + { + "text": "the", + "start": 16.6, + "end": 16.84, + "confidence": 0.35 + }, + { + "text": "helmet", + "start": 16.84, + "end": 17.36, + "confidence": 0.218 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.86, + "confidence": 0.361 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.435 + }, + { + "text": "to", + "start": 18.06, + "end": 18.22, + "confidence": 0.496 + }, + { + "text": "have", + "start": 18.22, + "end": 18.26, + "confidence": 0.831 + }, + { + "text": "in", + "start": 18.26, + "end": 18.48, + "confidence": 0.761 + }, + { + "text": "B1.", + "start": 18.48, + "end": 19.16, + "confidence": 0.731 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 20.12, + "end": 24.64, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.119, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.16, + "confidence": 0.27 + }, + { + "text": "you", + "start": 20.16, + "end": 20.36, + "confidence": 0.884 + }, + { + "text": "can", + "start": 20.36, + "end": 20.54, + "confidence": 0.388 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.929 + }, + { + "text": "the", + "start": 20.72, + "end": 20.9, + "confidence": 0.98 + }, + { + "text": "other", + "start": 20.9, + "end": 21.06, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.964 + }, + { + "text": "on", + "start": 21.26, + "end": 21.8, + "confidence": 0.962 + }, + { + "text": "the", + "start": 21.8, + "end": 21.84, + "confidence": 0.185 + }, + { + "text": "mic", + "start": 21.84, + "end": 22.6, + "confidence": 0.682 + }, + { + "text": "helmet", + "start": 22.6, + "end": 22.96, + "confidence": 0.011 + }, + { + "text": "with", + "start": 22.96, + "end": 23.2, + "confidence": 0.055 + }, + { + "text": "those", + "start": 23.2, + "end": 23.46, + "confidence": 0.084 + }, + { + "text": "GVA", + "start": 23.46, + "end": 23.88, + "confidence": 0.0 + }, + { + "text": "blizzard", + "start": 23.88, + "end": 24.36, + "confidence": 0.026 + }, + { + "text": "frames.", + "start": 24.36, + "end": 24.64, + "confidence": 0.179 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 24.86, + "end": 54.62, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.1079042222764757, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.00111382023897022, + "confidence": 0.887, + "words": [ + { + "text": "Alright,", + "start": 24.86, + "end": 32.0, + "confidence": 0.092 + }, + { + "text": "got", + "start": 32.0, + "end": 32.04, + "confidence": 0.107 + }, + { + "text": "them,", + "start": 32.04, + "end": 32.58, + "confidence": 0.194 + }, + { + "text": "got", + "start": 32.58, + "end": 33.06, + "confidence": 0.39 + }, + { + "text": "them,", + "start": 33.06, + "end": 33.78, + "confidence": 0.616 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.641 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.52, + "confidence": 0.506 + }, + { + "text": "got", + "start": 34.52, + "end": 34.74, + "confidence": 0.662 + }, + { + "text": "them,", + "start": 34.74, + "end": 34.78, + "confidence": 0.507 + }, + { + "text": "got", + "start": 34.78, + "end": 35.02, + "confidence": 0.754 + }, + { + "text": "them,", + "start": 35.02, + "end": 35.06, + "confidence": 0.589 + }, + { + "text": "got", + "start": 35.06, + "end": 35.1, + "confidence": 0.779 + }, + { + "text": "them,", + "start": 35.1, + "end": 35.14, + "confidence": 0.705 + }, + { + "text": "got", + "start": 35.14, + "end": 35.34, + "confidence": 0.86 + }, + { + "text": "them,", + "start": 35.34, + "end": 35.98, + "confidence": 0.82 + }, + { + "text": "got", + "start": 35.98, + "end": 36.02, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.06, + "confidence": 0.76 + }, + { + "text": "got", + "start": 36.06, + "end": 36.1, + "confidence": 0.479 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.14, + "confidence": 0.846 + }, + { + "text": "got", + "start": 36.14, + "end": 36.18, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 36.18, + "end": 36.22, + "confidence": 0.89 + }, + { + "text": "got", + "start": 36.22, + "end": 36.26, + "confidence": 0.928 + }, + { + "text": "them,", + "start": 36.26, + "end": 36.3, + "confidence": 0.903 + }, + { + "text": "got", + "start": 36.3, + "end": 36.34, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.34, + "end": 36.38, + "confidence": 0.91 + }, + { + "text": "got", + "start": 36.38, + "end": 36.42, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.42, + "end": 36.46, + "confidence": 0.904 + }, + { + "text": "got", + "start": 36.46, + "end": 36.5, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 36.5, + "end": 36.54, + "confidence": 0.903 + }, + { + "text": "got", + "start": 36.54, + "end": 36.58, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 36.58, + "end": 36.62, + "confidence": 0.912 + }, + { + "text": "got", + "start": 36.62, + "end": 36.66, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 36.66, + "end": 36.7, + "confidence": 0.918 + }, + { + "text": "got", + "start": 36.7, + "end": 36.74, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 36.74, + "end": 36.78, + "confidence": 0.923 + }, + { + "text": "got", + "start": 36.78, + "end": 36.82, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 36.82, + "end": 36.86, + "confidence": 0.93 + }, + { + "text": "got", + "start": 36.86, + "end": 36.9, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 36.9, + "end": 36.94, + "confidence": 0.937 + }, + { + "text": "got", + "start": 36.94, + "end": 36.98, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.98, + "end": 37.02, + "confidence": 0.942 + }, + { + "text": "got", + "start": 37.02, + "end": 37.06, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 37.06, + "end": 37.1, + "confidence": 0.945 + }, + { + "text": "got", + "start": 37.1, + "end": 37.14, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 37.14, + "end": 37.18, + "confidence": 0.948 + }, + { + "text": "got", + "start": 37.18, + "end": 37.22, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 37.22, + "end": 37.26, + "confidence": 0.951 + }, + { + "text": "got", + "start": 37.26, + "end": 37.3, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 37.3, + "end": 37.34, + "confidence": 0.953 + }, + { + "text": "got", + "start": 37.34, + "end": 37.38, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.38, + "end": 37.42, + "confidence": 0.956 + }, + { + "text": "got", + "start": 37.42, + "end": 37.46, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.5, + "confidence": 0.957 + }, + { + "text": "got", + "start": 37.5, + "end": 37.54, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 37.54, + "end": 37.58, + "confidence": 0.958 + }, + { + "text": "got", + "start": 37.58, + "end": 37.62, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.62, + "end": 37.66, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.66, + "end": 37.7, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.7, + "end": 37.74, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.74, + "end": 37.78, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 37.78, + "end": 37.82, + "confidence": 0.962 + }, + { + "text": "got", + "start": 37.82, + "end": 37.86, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 37.86, + "end": 37.9, + "confidence": 0.964 + }, + { + "text": "got", + "start": 37.9, + "end": 37.94, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.94, + "end": 37.98, + "confidence": 0.965 + }, + { + "text": "got", + "start": 37.98, + "end": 38.02, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 38.02, + "end": 38.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 38.06, + "end": 38.1, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.1, + "end": 38.14, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.14, + "end": 38.18, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.18, + "end": 38.22, + "confidence": 0.969 + }, + { + "text": "got", + "start": 38.22, + "end": 38.26, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.26, + "end": 38.3, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.3, + "end": 38.34, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.34, + "end": 38.38, + "confidence": 0.971 + }, + { + "text": "got", + "start": 38.38, + "end": 38.42, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 38.42, + "end": 38.46, + "confidence": 0.972 + }, + { + "text": "got", + "start": 38.46, + "end": 38.5, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.5, + "end": 38.54, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.54, + "end": 38.58, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.58, + "end": 38.62, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.62, + "end": 38.66, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 38.66, + "end": 38.7, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.7, + "end": 38.74, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 38.74, + "end": 38.78, + "confidence": 0.976 + }, + { + "text": "got", + "start": 38.78, + "end": 38.82, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 38.82, + "end": 38.86, + "confidence": 0.977 + }, + { + "text": "got", + "start": 38.86, + "end": 38.9, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 38.9, + "end": 38.94, + "confidence": 0.978 + }, + { + "text": "got", + "start": 38.94, + "end": 38.98, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 38.98, + "end": 39.02, + "confidence": 0.978 + }, + { + "text": "got", + "start": 39.02, + "end": 39.06, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 39.06, + "end": 39.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.1, + "end": 39.14, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.14, + "end": 39.18, + "confidence": 0.981 + }, + { + "text": "got", + "start": 39.18, + "end": 39.22, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.22, + "end": 39.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.26, + "end": 39.3, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.3, + "end": 39.34, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.34, + "end": 39.38, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.38, + "end": 39.42, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.42, + "end": 39.46, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.46, + "end": 39.5, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.5, + "end": 39.54, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.54, + "end": 39.58, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.58, + "end": 39.62, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.62, + "end": 39.66, + "confidence": 0.984 + }, + { + "text": "got", + "start": 39.66, + "end": 39.7, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.7, + "end": 39.74, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.74, + "end": 39.78, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.78, + "end": 39.82, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.82, + "end": 39.86, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.86, + "end": 39.9, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.9, + "end": 39.94, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.94, + "end": 39.98, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.98, + "end": 40.02, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.02, + "end": 40.06, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.06, + "end": 40.1, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.1, + "end": 40.14, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.14, + "end": 40.18, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.18, + "end": 40.22, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.22, + "end": 40.26, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.26, + "end": 40.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.3, + "end": 40.34, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.34, + "end": 40.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.38, + "end": 40.42, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.42, + "end": 40.46, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.46, + "end": 40.5, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.5, + "end": 40.54, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.54, + "end": 40.58, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.58, + "end": 40.76, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.76, + "end": 40.8, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.8, + "end": 40.84, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.84, + "end": 40.88, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.88, + "end": 40.92, + "confidence": 0.989 + }, + { + "text": "got", + "start": 40.92, + "end": 40.96, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.96, + "end": 41.0, + "confidence": 0.989 + }, + { + "text": "got", + "start": 41.0, + "end": 41.04, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.04, + "end": 41.08, + "confidence": 0.989 + }, + { + "text": "got", + "start": 41.08, + "end": 41.12, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.12, + "end": 42.32, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.32, + "end": 42.68, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 42.68, + "end": 42.9, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.9, + "end": 44.08, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.08, + "end": 44.96, + "confidence": 0.989 + }, + { + "text": "got", + "start": 44.96, + "end": 46.9, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.9, + "end": 47.82, + "confidence": 0.989 + }, + { + "text": "got", + "start": 47.82, + "end": 48.56, + "confidence": 0.994 + }, + { + "text": "them", + "start": 48.56, + "end": 54.62, + "confidence": 0.996 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 54.64, + "end": 84.6, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.053046889369263245, + "compression_ratio": 29.52, + "no_speech_prob": 0.24410122632980347, + "confidence": 0.839, + "words": [ + { + "text": "got", + "start": 54.64, + "end": 54.82, + "confidence": 0.004 + }, + { + "text": "them,", + "start": 54.82, + "end": 56.36, + "confidence": 0.007 + }, + { + "text": "got", + "start": 56.36, + "end": 58.46, + "confidence": 0.037 + }, + { + "text": "them,", + "start": 58.46, + "end": 59.7, + "confidence": 0.349 + }, + { + "text": "got", + "start": 59.7, + "end": 60.0, + "confidence": 0.616 + }, + { + "text": "them,", + "start": 60.0, + "end": 60.04, + "confidence": 0.574 + }, + { + "text": "got", + "start": 60.04, + "end": 60.08, + "confidence": 0.808 + }, + { + "text": "them,", + "start": 60.08, + "end": 60.12, + "confidence": 0.636 + }, + { + "text": "got", + "start": 60.12, + "end": 60.16, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 60.16, + "end": 60.2, + "confidence": 0.676 + }, + { + "text": "got", + "start": 60.2, + "end": 60.24, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 60.24, + "end": 60.42, + "confidence": 0.689 + }, + { + "text": "got", + "start": 60.42, + "end": 60.46, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 60.46, + "end": 60.5, + "confidence": 0.757 + }, + { + "text": "got", + "start": 60.5, + "end": 60.62, + "confidence": 0.904 + }, + { + "text": "them,", + "start": 60.62, + "end": 61.78, + "confidence": 0.825 + }, + { + "text": "got", + "start": 61.78, + "end": 61.82, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 61.82, + "end": 61.86, + "confidence": 0.58 + }, + { + "text": "got", + "start": 61.86, + "end": 61.9, + "confidence": 0.123 + }, + { + "text": "them,", + "start": 61.9, + "end": 61.94, + "confidence": 0.75 + }, + { + "text": "got", + "start": 61.94, + "end": 61.98, + "confidence": 0.772 + }, + { + "text": "them,", + "start": 61.98, + "end": 62.02, + "confidence": 0.81 + }, + { + "text": "got", + "start": 62.02, + "end": 62.06, + "confidence": 0.808 + }, + { + "text": "them,", + "start": 62.06, + "end": 62.1, + "confidence": 0.828 + }, + { + "text": "got", + "start": 62.1, + "end": 62.14, + "confidence": 0.82 + }, + { + "text": "them,", + "start": 62.14, + "end": 62.18, + "confidence": 0.852 + }, + { + "text": "got", + "start": 62.18, + "end": 62.22, + "confidence": 0.858 + }, + { + "text": "them,", + "start": 62.22, + "end": 62.26, + "confidence": 0.886 + }, + { + "text": "got", + "start": 62.26, + "end": 62.3, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 62.3, + "end": 62.34, + "confidence": 0.901 + }, + { + "text": "got", + "start": 62.34, + "end": 62.38, + "confidence": 0.89 + }, + { + "text": "them,", + "start": 62.38, + "end": 62.42, + "confidence": 0.905 + }, + { + "text": "got", + "start": 62.42, + "end": 62.46, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 62.46, + "end": 62.5, + "confidence": 0.919 + }, + { + "text": "got", + "start": 62.5, + "end": 62.54, + "confidence": 0.881 + }, + { + "text": "them,", + "start": 62.54, + "end": 62.58, + "confidence": 0.929 + }, + { + "text": "got", + "start": 62.58, + "end": 62.62, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 62.62, + "end": 62.66, + "confidence": 0.938 + }, + { + "text": "got", + "start": 62.66, + "end": 62.7, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 62.7, + "end": 62.74, + "confidence": 0.943 + }, + { + "text": "got", + "start": 62.74, + "end": 62.78, + "confidence": 0.913 + }, + { + "text": "them,", + "start": 62.78, + "end": 62.82, + "confidence": 0.949 + }, + { + "text": "got", + "start": 62.82, + "end": 62.86, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 62.86, + "end": 62.9, + "confidence": 0.952 + }, + { + "text": "got", + "start": 62.9, + "end": 62.94, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 62.94, + "end": 62.98, + "confidence": 0.956 + }, + { + "text": "got", + "start": 62.98, + "end": 63.02, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 63.02, + "end": 63.06, + "confidence": 0.958 + }, + { + "text": "got", + "start": 63.06, + "end": 63.1, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 63.1, + "end": 63.14, + "confidence": 0.961 + }, + { + "text": "got", + "start": 63.14, + "end": 63.18, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 63.18, + "end": 63.22, + "confidence": 0.964 + }, + { + "text": "got", + "start": 63.22, + "end": 63.26, + "confidence": 0.939 + }, + { + "text": "them,", + "start": 63.26, + "end": 63.3, + "confidence": 0.964 + }, + { + "text": "got", + "start": 63.3, + "end": 63.34, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 63.34, + "end": 63.38, + "confidence": 0.965 + }, + { + "text": "got", + "start": 63.38, + "end": 63.42, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 63.42, + "end": 63.46, + "confidence": 0.967 + }, + { + "text": "got", + "start": 63.46, + "end": 63.5, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 63.5, + "end": 63.54, + "confidence": 0.968 + }, + { + "text": "got", + "start": 63.54, + "end": 63.58, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 63.58, + "end": 63.62, + "confidence": 0.968 + }, + { + "text": "got", + "start": 63.62, + "end": 63.66, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 63.66, + "end": 63.7, + "confidence": 0.97 + }, + { + "text": "got", + "start": 63.7, + "end": 63.74, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 63.74, + "end": 63.78, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.78, + "end": 63.82, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 63.82, + "end": 63.86, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.86, + "end": 63.9, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 63.9, + "end": 63.94, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.94, + "end": 63.98, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 63.98, + "end": 64.02, + "confidence": 0.97 + }, + { + "text": "got", + "start": 64.02, + "end": 64.06, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 64.06, + "end": 64.1, + "confidence": 0.972 + }, + { + "text": "got", + "start": 64.1, + "end": 64.14, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 64.14, + "end": 64.18, + "confidence": 0.972 + }, + { + "text": "got", + "start": 64.18, + "end": 64.22, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 64.22, + "end": 64.26, + "confidence": 0.973 + }, + { + "text": "got", + "start": 64.26, + "end": 64.3, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 64.3, + "end": 64.34, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.34, + "end": 64.38, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 64.38, + "end": 64.42, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.42, + "end": 64.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 64.46, + "end": 64.5, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.5, + "end": 64.54, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 64.54, + "end": 64.58, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.58, + "end": 64.62, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 64.62, + "end": 64.66, + "confidence": 0.975 + }, + { + "text": "got", + "start": 64.66, + "end": 64.7, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 64.7, + "end": 64.74, + "confidence": 0.975 + }, + { + "text": "got", + "start": 64.74, + "end": 64.78, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 64.78, + "end": 64.82, + "confidence": 0.977 + }, + { + "text": "got", + "start": 64.82, + "end": 64.86, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 64.86, + "end": 64.9, + "confidence": 0.977 + }, + { + "text": "got", + "start": 64.9, + "end": 64.94, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 64.94, + "end": 64.98, + "confidence": 0.976 + }, + { + "text": "got", + "start": 64.98, + "end": 65.02, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 65.02, + "end": 65.06, + "confidence": 0.976 + }, + { + "text": "got", + "start": 65.06, + "end": 65.1, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 65.1, + "end": 65.14, + "confidence": 0.978 + }, + { + "text": "got", + "start": 65.14, + "end": 65.18, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 65.18, + "end": 65.22, + "confidence": 0.978 + }, + { + "text": "got", + "start": 65.22, + "end": 65.26, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 65.26, + "end": 65.3, + "confidence": 0.979 + }, + { + "text": "got", + "start": 65.3, + "end": 65.34, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 65.34, + "end": 65.38, + "confidence": 0.979 + }, + { + "text": "got", + "start": 65.38, + "end": 65.42, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 65.42, + "end": 65.46, + "confidence": 0.978 + }, + { + "text": "got", + "start": 65.46, + "end": 65.5, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 65.5, + "end": 65.54, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.54, + "end": 65.58, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 65.58, + "end": 65.62, + "confidence": 0.979 + }, + { + "text": "got", + "start": 65.62, + "end": 65.66, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 65.66, + "end": 65.7, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.7, + "end": 65.74, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 65.74, + "end": 65.78, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.78, + "end": 65.82, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 65.82, + "end": 65.86, + "confidence": 0.981 + }, + { + "text": "got", + "start": 65.86, + "end": 65.9, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 65.9, + "end": 65.94, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.94, + "end": 65.98, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 65.98, + "end": 66.02, + "confidence": 0.982 + }, + { + "text": "got", + "start": 66.02, + "end": 66.3, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 66.3, + "end": 66.34, + "confidence": 0.982 + }, + { + "text": "got", + "start": 66.34, + "end": 66.54, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 66.54, + "end": 66.58, + "confidence": 0.982 + }, + { + "text": "got", + "start": 66.58, + "end": 66.62, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 66.62, + "end": 66.66, + "confidence": 0.981 + }, + { + "text": "got", + "start": 66.66, + "end": 67.38, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 67.38, + "end": 67.42, + "confidence": 0.982 + }, + { + "text": "got", + "start": 67.42, + "end": 67.46, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 67.46, + "end": 67.5, + "confidence": 0.982 + }, + { + "text": "got", + "start": 67.5, + "end": 67.54, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 67.54, + "end": 67.72, + "confidence": 0.983 + }, + { + "text": "got", + "start": 67.72, + "end": 67.76, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 67.76, + "end": 67.8, + "confidence": 0.984 + }, + { + "text": "got", + "start": 67.8, + "end": 68.9, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 68.9, + "end": 69.8, + "confidence": 0.983 + }, + { + "text": "got", + "start": 69.8, + "end": 70.66, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 70.66, + "end": 70.94, + "confidence": 0.985 + }, + { + "text": "got", + "start": 70.94, + "end": 71.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 71.08, + "end": 72.02, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.02, + "end": 72.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 72.08, + "end": 72.84, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.84, + "end": 77.32, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 77.32, + "end": 79.8, + "confidence": 0.985 + }, + { + "text": "got", + "start": 79.8, + "end": 79.84, + "confidence": 0.992 + }, + { + "text": "them", + "start": 79.84, + "end": 84.6, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.24, + "end": 91.16, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04965524716227578, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971923112869263, + "confidence": 0.851, + "words": [ + { + "text": "got", + "start": 85.24, + "end": 85.28, + "confidence": 0.0 + }, + { + "text": "them,", + "start": 85.28, + "end": 85.32, + "confidence": 0.024 + }, + { + "text": "got", + "start": 85.32, + "end": 85.36, + "confidence": 0.839 + }, + { + "text": "them,", + "start": 85.36, + "end": 85.4, + "confidence": 0.564 + }, + { + "text": "got", + "start": 85.4, + "end": 85.44, + "confidence": 0.755 + }, + { + "text": "them,", + "start": 85.44, + "end": 85.48, + "confidence": 0.442 + }, + { + "text": "got", + "start": 85.48, + "end": 85.52, + "confidence": 0.859 + }, + { + "text": "them,", + "start": 85.52, + "end": 85.56, + "confidence": 0.389 + }, + { + "text": "got", + "start": 85.56, + "end": 85.6, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 85.6, + "end": 85.64, + "confidence": 0.376 + }, + { + "text": "got", + "start": 85.64, + "end": 85.68, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 85.68, + "end": 85.72, + "confidence": 0.39 + }, + { + "text": "got", + "start": 85.72, + "end": 85.76, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 85.76, + "end": 85.8, + "confidence": 0.468 + }, + { + "text": "got", + "start": 85.8, + "end": 85.84, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 85.84, + "end": 85.88, + "confidence": 0.576 + }, + { + "text": "got", + "start": 85.88, + "end": 85.92, + "confidence": 0.914 + }, + { + "text": "them,", + "start": 85.92, + "end": 85.96, + "confidence": 0.645 + }, + { + "text": "got", + "start": 85.96, + "end": 86.0, + "confidence": 0.633 + }, + { + "text": "them,", + "start": 86.0, + "end": 86.04, + "confidence": 0.746 + }, + { + "text": "got", + "start": 86.04, + "end": 86.08, + "confidence": 0.829 + }, + { + "text": "them,", + "start": 86.08, + "end": 86.12, + "confidence": 0.842 + }, + { + "text": "got", + "start": 86.12, + "end": 86.16, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 86.16, + "end": 86.2, + "confidence": 0.89 + }, + { + "text": "got", + "start": 86.2, + "end": 86.24, + "confidence": 0.924 + }, + { + "text": "them,", + "start": 86.24, + "end": 86.28, + "confidence": 0.916 + }, + { + "text": "got", + "start": 86.28, + "end": 86.32, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 86.32, + "end": 86.36, + "confidence": 0.934 + }, + { + "text": "got", + "start": 86.36, + "end": 86.4, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 86.4, + "end": 86.44, + "confidence": 0.946 + }, + { + "text": "got", + "start": 86.44, + "end": 86.48, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 86.48, + "end": 86.52, + "confidence": 0.946 + }, + { + "text": "got", + "start": 86.52, + "end": 86.56, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 86.56, + "end": 86.6, + "confidence": 0.946 + }, + { + "text": "got", + "start": 86.6, + "end": 86.64, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 86.64, + "end": 86.68, + "confidence": 0.948 + }, + { + "text": "got", + "start": 86.68, + "end": 86.72, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 86.72, + "end": 86.76, + "confidence": 0.953 + }, + { + "text": "got", + "start": 86.76, + "end": 86.8, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 86.8, + "end": 86.84, + "confidence": 0.956 + }, + { + "text": "got", + "start": 86.84, + "end": 86.88, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 86.88, + "end": 86.92, + "confidence": 0.961 + }, + { + "text": "got", + "start": 86.92, + "end": 86.96, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 86.96, + "end": 87.0, + "confidence": 0.964 + }, + { + "text": "got", + "start": 87.0, + "end": 87.04, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 87.04, + "end": 87.08, + "confidence": 0.967 + }, + { + "text": "got", + "start": 87.08, + "end": 87.12, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 87.12, + "end": 87.16, + "confidence": 0.969 + }, + { + "text": "got", + "start": 87.16, + "end": 87.2, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.2, + "end": 87.24, + "confidence": 0.972 + }, + { + "text": "got", + "start": 87.24, + "end": 87.28, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.28, + "end": 87.32, + "confidence": 0.976 + }, + { + "text": "got", + "start": 87.32, + "end": 87.36, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.36, + "end": 87.4, + "confidence": 0.976 + }, + { + "text": "got", + "start": 87.4, + "end": 87.44, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.44, + "end": 87.48, + "confidence": 0.977 + }, + { + "text": "got", + "start": 87.48, + "end": 87.52, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 87.52, + "end": 87.56, + "confidence": 0.978 + }, + { + "text": "got", + "start": 87.56, + "end": 87.6, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 87.6, + "end": 87.64, + "confidence": 0.98 + }, + { + "text": "got", + "start": 87.64, + "end": 87.68, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 87.68, + "end": 87.72, + "confidence": 0.98 + }, + { + "text": "got", + "start": 87.72, + "end": 87.76, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 87.76, + "end": 87.8, + "confidence": 0.981 + }, + { + "text": "got", + "start": 87.8, + "end": 87.84, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 87.84, + "end": 87.88, + "confidence": 0.982 + }, + { + "text": "got", + "start": 87.88, + "end": 87.92, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 87.92, + "end": 87.96, + "confidence": 0.983 + }, + { + "text": "got", + "start": 87.96, + "end": 88.0, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 88.0, + "end": 88.04, + "confidence": 0.983 + }, + { + "text": "got", + "start": 88.04, + "end": 88.08, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 88.08, + "end": 88.12, + "confidence": 0.983 + }, + { + "text": "got", + "start": 88.12, + "end": 88.16, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 88.16, + "end": 88.2, + "confidence": 0.984 + }, + { + "text": "got", + "start": 88.2, + "end": 88.24, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 88.24, + "end": 88.28, + "confidence": 0.984 + }, + { + "text": "got", + "start": 88.28, + "end": 88.32, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 88.32, + "end": 88.36, + "confidence": 0.986 + }, + { + "text": "got", + "start": 88.36, + "end": 88.4, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 88.4, + "end": 88.44, + "confidence": 0.986 + }, + { + "text": "got", + "start": 88.44, + "end": 88.48, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 88.48, + "end": 88.52, + "confidence": 0.986 + }, + { + "text": "got", + "start": 88.52, + "end": 88.56, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 88.56, + "end": 88.6, + "confidence": 0.987 + }, + { + "text": "got", + "start": 88.6, + "end": 88.64, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 88.64, + "end": 88.68, + "confidence": 0.987 + }, + { + "text": "got", + "start": 88.68, + "end": 88.72, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 88.72, + "end": 88.76, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.76, + "end": 88.8, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 88.8, + "end": 88.84, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.84, + "end": 88.88, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 88.88, + "end": 88.92, + "confidence": 0.989 + }, + { + "text": "got", + "start": 88.92, + "end": 88.96, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 88.96, + "end": 89.0, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.0, + "end": 89.04, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 89.04, + "end": 89.08, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.08, + "end": 89.12, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 89.12, + "end": 89.16, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.16, + "end": 89.2, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 89.2, + "end": 89.24, + "confidence": 0.99 + }, + { + "text": "got", + "start": 89.24, + "end": 89.28, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 89.28, + "end": 89.32, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.32, + "end": 89.36, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 89.36, + "end": 89.4, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.4, + "end": 89.44, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 89.44, + "end": 89.48, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.48, + "end": 89.52, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 89.52, + "end": 89.56, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.56, + "end": 89.6, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 89.6, + "end": 89.64, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.64, + "end": 89.68, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 89.68, + "end": 89.72, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.72, + "end": 89.76, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 89.76, + "end": 89.8, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.8, + "end": 89.84, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 89.84, + "end": 89.88, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.88, + "end": 89.92, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 89.92, + "end": 89.96, + "confidence": 0.993 + }, + { + "text": "got", + "start": 89.96, + "end": 90.0, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 90.0, + "end": 90.04, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.04, + "end": 90.08, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.08, + "end": 90.12, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.12, + "end": 90.16, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.16, + "end": 90.2, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.2, + "end": 90.24, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.24, + "end": 90.28, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.28, + "end": 90.32, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.32, + "end": 90.36, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.36, + "end": 90.4, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.4, + "end": 90.44, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.44, + "end": 90.48, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.48, + "end": 90.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.52, + "end": 90.56, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.56, + "end": 90.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.6, + "end": 90.64, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.64, + "end": 90.68, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.68, + "end": 90.72, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.72, + "end": 90.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.76, + "end": 90.8, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.8, + "end": 90.84, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.84, + "end": 90.88, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.88, + "end": 90.92, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.92, + "end": 90.96, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.96, + "end": 91.0, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.0, + "end": 91.04, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.04, + "end": 91.08, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.08, + "end": 91.12, + "confidence": 0.995 + }, + { + "text": "them", + "start": 91.12, + "end": 91.16, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive.cuda/accurate_apollo11.mp3.words.json b/tests/expected/naive.cuda/accurate_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..5541228558b3a625658b695ccc9b63cdd745f928 --- /dev/null +++ b/tests/expected/naive.cuda/accurate_apollo11.mp3.words.json @@ -0,0 +1,2438 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A. All right. Okay. Yeah, sir. Yeah, sir. Let's take that camera. Let's say it makes it want to go on the helmet we were going to have in B-1. The other one. And you can put the other one on the mic helmet. We'll show it to you in a quick screen. Over. All right. Yeah, sir. All right. Yeah, sir. There's a better helmet than B-1. That's the other one. Nice. Let's go in there. At least we're safe. We've got them in there. Helmet bags. And, uh, I guess we have helmets in the helmet bag. At least it's in the helmet bag. Right here. Right here. Yeah, we're taking it next day out of the field up. Yeah, we were. You want to hack me on that? Yeah, we were. You want to hack me on that? With the cover, I tried it already. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd, uh, say you could check it out. It's not much of an order to turn that. So, uh, I guess we're going to come up with this. Let us know. Okay. No problem. Okay. No problem. No problem. No problem. No problem. No problem. No problem. No problem.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.76, + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 44, + 12, + 36, + 12, + 38, + 12, + 36, + 12, + 32, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.578, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.52, + "confidence": 0.842 + }, + { + "text": "Houston,", + "start": 1.52, + "end": 1.94, + "confidence": 0.665 + }, + { + "text": "we", + "start": 1.94, + "end": 1.98, + "confidence": 0.983 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.743 + }, + { + "text": "a", + "start": 2.16, + "end": 2.32, + "confidence": 0.993 + }, + { + "text": "recommendation", + "start": 2.32, + "end": 3.08, + "confidence": 0.977 + }, + { + "text": "for", + "start": 3.08, + "end": 3.5, + "confidence": 0.94 + }, + { + "text": "you", + "start": 3.5, + "end": 3.72, + "confidence": 0.993 + }, + { + "text": "on", + "start": 3.72, + "end": 4.26, + "confidence": 0.918 + }, + { + "text": "your", + "start": 4.26, + "end": 4.3, + "confidence": 0.976 + }, + { + "text": "Soyuz-EA", + "start": 4.3, + "end": 5.36, + "confidence": 0.455 + }, + { + "text": "GLEM-E-G-E-A.", + "start": 5.36, + "end": 6.76, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 10.9, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.149, + "words": [ + { + "text": "All", + "start": 10.82, + "end": 10.86, + "confidence": 0.006 + }, + { + "text": "right.", + "start": 10.86, + "end": 10.9, + "confidence": 0.72 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.02, + "end": 12.06, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.168, + "words": [ + { + "text": "Okay.", + "start": 12.02, + "end": 12.06, + "confidence": 0.168 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 12.08, + "end": 13.44, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.131, + "words": [ + { + "text": "Yeah,", + "start": 12.08, + "end": 12.68, + "confidence": 0.079 + }, + { + "text": "sir.", + "start": 12.68, + "end": 13.44, + "confidence": 0.218 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.66, + "end": 14.28, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.047, + "words": [ + { + "text": "Yeah,", + "start": 13.66, + "end": 13.82, + "confidence": 0.11 + }, + { + "text": "sir.", + "start": 13.82, + "end": 14.28, + "confidence": 0.02 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 14.3, + "end": 14.98, + "text": " Let's take that camera.", + "tokens": [ + 961, + 311, + 747, + 300, + 2799, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.033, + "words": [ + { + "text": "Let's", + "start": 14.3, + "end": 14.54, + "confidence": 0.036 + }, + { + "text": "take", + "start": 14.54, + "end": 14.58, + "confidence": 0.026 + }, + { + "text": "that", + "start": 14.58, + "end": 14.62, + "confidence": 0.031 + }, + { + "text": "camera.", + "start": 14.62, + "end": 14.98, + "confidence": 0.034 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 15.0, + "end": 19.22, + "text": " Let's say it makes it want to go on the helmet we were going to have in B-1.", + "tokens": [ + 961, + 311, + 584, + 309, + 1669, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.299, + "words": [ + { + "text": "Let's", + "start": 15.0, + "end": 15.38, + "confidence": 0.273 + }, + { + "text": "say", + "start": 15.38, + "end": 15.58, + "confidence": 0.349 + }, + { + "text": "it", + "start": 15.58, + "end": 15.76, + "confidence": 0.01 + }, + { + "text": "makes", + "start": 15.76, + "end": 15.8, + "confidence": 0.736 + }, + { + "text": "it", + "start": 15.8, + "end": 16.04, + "confidence": 0.921 + }, + { + "text": "want", + "start": 16.04, + "end": 16.16, + "confidence": 0.297 + }, + { + "text": "to", + "start": 16.16, + "end": 16.34, + "confidence": 0.97 + }, + { + "text": "go", + "start": 16.34, + "end": 16.44, + "confidence": 0.932 + }, + { + "text": "on", + "start": 16.44, + "end": 16.74, + "confidence": 0.912 + }, + { + "text": "the", + "start": 16.74, + "end": 16.86, + "confidence": 0.496 + }, + { + "text": "helmet", + "start": 16.86, + "end": 17.34, + "confidence": 0.005 + }, + { + "text": "we", + "start": 17.34, + "end": 17.64, + "confidence": 0.01 + }, + { + "text": "were", + "start": 17.64, + "end": 17.86, + "confidence": 0.08 + }, + { + "text": "going", + "start": 17.86, + "end": 18.04, + "confidence": 0.658 + }, + { + "text": "to", + "start": 18.04, + "end": 18.2, + "confidence": 0.966 + }, + { + "text": "have", + "start": 18.2, + "end": 18.26, + "confidence": 0.957 + }, + { + "text": "in", + "start": 18.26, + "end": 18.64, + "confidence": 0.863 + }, + { + "text": "B-1.", + "start": 18.64, + "end": 19.22, + "confidence": 0.506 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.38, + "end": 19.98, + "text": " The other one.", + "tokens": [ + 440, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.13, + "words": [ + { + "text": "The", + "start": 19.38, + "end": 19.42, + "confidence": 0.005 + }, + { + "text": "other", + "start": 19.42, + "end": 19.68, + "confidence": 0.186 + }, + { + "text": "one.", + "start": 19.68, + "end": 19.98, + "confidence": 0.586 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 20.12, + "end": 22.9, + "text": " And you can put the other one on the mic helmet.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.366, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.26, + "confidence": 0.343 + }, + { + "text": "you", + "start": 20.26, + "end": 20.38, + "confidence": 0.89 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.087 + }, + { + "text": "put", + "start": 20.54, + "end": 20.7, + "confidence": 0.901 + }, + { + "text": "the", + "start": 20.7, + "end": 20.86, + "confidence": 0.979 + }, + { + "text": "other", + "start": 20.86, + "end": 21.06, + "confidence": 0.997 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.973 + }, + { + "text": "on", + "start": 21.26, + "end": 21.9, + "confidence": 0.968 + }, + { + "text": "the", + "start": 21.9, + "end": 21.94, + "confidence": 0.09 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.58, + "confidence": 0.682 + }, + { + "text": "helmet.", + "start": 22.58, + "end": 22.9, + "confidence": 0.066 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 22.92, + "end": 24.8, + "text": " We'll show it to you in a quick screen.", + "tokens": [ + 492, + 603, + 855, + 309, + 281, + 291, + 294, + 257, + 1702, + 2568, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.122, + "words": [ + { + "text": "We'll", + "start": 22.92, + "end": 23.22, + "confidence": 0.08 + }, + { + "text": "show", + "start": 23.22, + "end": 23.32, + "confidence": 0.011 + }, + { + "text": "it", + "start": 23.32, + "end": 23.54, + "confidence": 0.15 + }, + { + "text": "to", + "start": 23.54, + "end": 23.64, + "confidence": 0.255 + }, + { + "text": "you", + "start": 23.64, + "end": 23.78, + "confidence": 0.703 + }, + { + "text": "in", + "start": 23.78, + "end": 23.96, + "confidence": 0.231 + }, + { + "text": "a", + "start": 23.96, + "end": 24.12, + "confidence": 0.736 + }, + { + "text": "quick", + "start": 24.12, + "end": 24.16, + "confidence": 0.018 + }, + { + "text": "screen.", + "start": 24.16, + "end": 24.8, + "confidence": 0.122 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 24.94, + "end": 25.26, + "text": " Over.", + "tokens": [ + 4886, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.125, + "words": [ + { + "text": "Over.", + "start": 24.94, + "end": 25.26, + "confidence": 0.125 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 25.52, + "end": 27.52, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.108, + "words": [ + { + "text": "All", + "start": 25.52, + "end": 26.14, + "confidence": 0.003 + }, + { + "text": "right.", + "start": 26.14, + "end": 27.52, + "confidence": 0.677 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 31.32, + "end": 32.48, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.045, + "words": [ + { + "text": "Yeah,", + "start": 31.32, + "end": 31.56, + "confidence": 0.033 + }, + { + "text": "sir.", + "start": 31.56, + "end": 32.48, + "confidence": 0.062 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 32.66, + "end": 33.06, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.223, + "words": [ + { + "text": "All", + "start": 32.66, + "end": 32.94, + "confidence": 0.047 + }, + { + "text": "right.", + "start": 32.94, + "end": 33.06, + "confidence": 0.488 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 33.08, + "end": 34.2, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.035, + "words": [ + { + "text": "Yeah,", + "start": 33.08, + "end": 33.36, + "confidence": 0.144 + }, + { + "text": "sir.", + "start": 33.36, + "end": 34.2, + "confidence": 0.009 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 34.52, + "end": 35.4, + "text": " There's a better helmet than B-1.", + "tokens": [ + 821, + 311, + 257, + 1101, + 15922, + 813, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.014, + "words": [ + { + "text": "There's", + "start": 34.52, + "end": 34.56, + "confidence": 0.046 + }, + { + "text": "a", + "start": 34.56, + "end": 34.64, + "confidence": 0.037 + }, + { + "text": "better", + "start": 34.64, + "end": 34.94, + "confidence": 0.0 + }, + { + "text": "helmet", + "start": 34.94, + "end": 34.98, + "confidence": 0.0 + }, + { + "text": "than", + "start": 34.98, + "end": 35.02, + "confidence": 0.179 + }, + { + "text": "B-1.", + "start": 35.02, + "end": 35.4, + "confidence": 0.033 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 35.42, + "end": 35.94, + "text": " That's the other one.", + "tokens": [ + 663, + 311, + 264, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.162, + "words": [ + { + "text": "That's", + "start": 35.42, + "end": 35.48, + "confidence": 0.082 + }, + { + "text": "the", + "start": 35.48, + "end": 35.52, + "confidence": 0.167 + }, + { + "text": "other", + "start": 35.52, + "end": 35.68, + "confidence": 0.412 + }, + { + "text": "one.", + "start": 35.68, + "end": 35.94, + "confidence": 0.197 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 35.96, + "end": 36.34, + "text": " Nice.", + "tokens": [ + 5490, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.007, + "words": [ + { + "text": "Nice.", + "start": 35.96, + "end": 36.34, + "confidence": 0.007 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 37.68, + "end": 38.48, + "text": " Let's go in there.", + "tokens": [ + 961, + 311, + 352, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.125, + "words": [ + { + "text": "Let's", + "start": 37.68, + "end": 38.16, + "confidence": 0.022 + }, + { + "text": "go", + "start": 38.16, + "end": 38.2, + "confidence": 0.626 + }, + { + "text": "in", + "start": 38.2, + "end": 38.24, + "confidence": 0.037 + }, + { + "text": "there.", + "start": 38.24, + "end": 38.48, + "confidence": 0.579 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 38.6, + "end": 39.24, + "text": " At least we're safe.", + "tokens": [ + 1711, + 1935, + 321, + 434, + 3273, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.125, + "words": [ + { + "text": "At", + "start": 38.6, + "end": 38.64, + "confidence": 0.001 + }, + { + "text": "least", + "start": 38.64, + "end": 38.86, + "confidence": 0.311 + }, + { + "text": "we're", + "start": 38.86, + "end": 39.08, + "confidence": 0.12 + }, + { + "text": "safe.", + "start": 39.08, + "end": 39.24, + "confidence": 0.799 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 39.94, + "end": 40.54, + "text": " We've got them in there.", + "tokens": [ + 492, + 600, + 658, + 552, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.076, + "words": [ + { + "text": "We've", + "start": 39.94, + "end": 40.38, + "confidence": 0.06 + }, + { + "text": "got", + "start": 40.38, + "end": 40.42, + "confidence": 0.737 + }, + { + "text": "them", + "start": 40.42, + "end": 40.46, + "confidence": 0.004 + }, + { + "text": "in", + "start": 40.46, + "end": 40.5, + "confidence": 0.039 + }, + { + "text": "there.", + "start": 40.5, + "end": 40.54, + "confidence": 0.198 + } + ] + }, + { + "id": 21, + "seek": 2600, + "start": 40.54, + "end": 41.28, + "text": " Helmet bags.", + "tokens": [ + 6128, + 5537, + 10405, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.001, + "words": [ + { + "text": "Helmet", + "start": 40.54, + "end": 40.78, + "confidence": 0.001 + }, + { + "text": "bags.", + "start": 40.78, + "end": 41.28, + "confidence": 0.002 + } + ] + }, + { + "id": 22, + "seek": 2600, + "start": 43.52, + "end": 47.74, + "text": " And, uh, I guess we have helmets in the helmet bag.", + "tokens": [ + 400, + 11, + 2232, + 11, + 286, + 2041, + 321, + 362, + 42022, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.208, + "words": [ + { + "text": "And,", + "start": 43.52, + "end": 43.56, + "confidence": 0.087 + }, + { + "text": "uh,", + "start": 43.56, + "end": 44.02, + "confidence": 0.767 + }, + { + "text": "I", + "start": 44.02, + "end": 44.06, + "confidence": 0.321 + }, + { + "text": "guess", + "start": 44.06, + "end": 44.32, + "confidence": 0.236 + }, + { + "text": "we", + "start": 44.32, + "end": 45.0, + "confidence": 0.078 + }, + { + "text": "have", + "start": 45.0, + "end": 45.6, + "confidence": 0.06 + }, + { + "text": "helmets", + "start": 45.6, + "end": 46.62, + "confidence": 0.004 + }, + { + "text": "in", + "start": 46.62, + "end": 46.82, + "confidence": 0.6 + }, + { + "text": "the", + "start": 46.82, + "end": 46.98, + "confidence": 0.614 + }, + { + "text": "helmet", + "start": 46.98, + "end": 47.2, + "confidence": 0.823 + }, + { + "text": "bag.", + "start": 47.2, + "end": 47.74, + "confidence": 0.363 + } + ] + }, + { + "id": 23, + "seek": 2600, + "start": 48.24, + "end": 49.84, + "text": " At least it's in the helmet bag.", + "tokens": [ + 1711, + 1935, + 309, + 311, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.112, + "words": [ + { + "text": "At", + "start": 48.24, + "end": 48.38, + "confidence": 0.064 + }, + { + "text": "least", + "start": 48.38, + "end": 48.66, + "confidence": 0.795 + }, + { + "text": "it's", + "start": 48.66, + "end": 49.02, + "confidence": 0.347 + }, + { + "text": "in", + "start": 49.02, + "end": 49.06, + "confidence": 0.172 + }, + { + "text": "the", + "start": 49.06, + "end": 49.1, + "confidence": 0.242 + }, + { + "text": "helmet", + "start": 49.1, + "end": 49.22, + "confidence": 0.003 + }, + { + "text": "bag.", + "start": 49.22, + "end": 49.84, + "confidence": 0.062 + } + ] + }, + { + "id": 24, + "seek": 2600, + "start": 50.22, + "end": 50.94, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.099, + "words": [ + { + "text": "Right", + "start": 50.22, + "end": 50.48, + "confidence": 0.006 + }, + { + "text": "here.", + "start": 50.48, + "end": 50.94, + "confidence": 0.416 + } + ] + }, + { + "id": 25, + "seek": 2600, + "start": 51.54, + "end": 52.24, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.04, + "words": [ + { + "text": "Right", + "start": 51.54, + "end": 51.8, + "confidence": 0.008 + }, + { + "text": "here.", + "start": 51.8, + "end": 52.24, + "confidence": 0.091 + } + ] + }, + { + "id": 26, + "seek": 2600, + "start": 53.2, + "end": 55.42, + "text": " Yeah, we're taking it next day out of the field up.", + "tokens": [ + 865, + 11, + 321, + 434, + 1940, + 309, + 958, + 786, + 484, + 295, + 264, + 2519, + 493, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.367, + "words": [ + { + "text": "Yeah,", + "start": 53.2, + "end": 53.38, + "confidence": 0.47 + }, + { + "text": "we're", + "start": 53.38, + "end": 53.7, + "confidence": 0.742 + }, + { + "text": "taking", + "start": 53.7, + "end": 53.74, + "confidence": 0.322 + }, + { + "text": "it", + "start": 53.74, + "end": 53.86, + "confidence": 0.082 + }, + { + "text": "next", + "start": 53.86, + "end": 53.96, + "confidence": 0.545 + }, + { + "text": "day", + "start": 53.96, + "end": 54.16, + "confidence": 0.817 + }, + { + "text": "out", + "start": 54.16, + "end": 54.32, + "confidence": 0.29 + }, + { + "text": "of", + "start": 54.32, + "end": 54.54, + "confidence": 0.351 + }, + { + "text": "the", + "start": 54.54, + "end": 54.68, + "confidence": 0.432 + }, + { + "text": "field", + "start": 54.68, + "end": 55.2, + "confidence": 0.233 + }, + { + "text": "up.", + "start": 55.2, + "end": 55.42, + "confidence": 0.235 + } + ] + }, + { + "id": 27, + "seek": 5500, + "start": 55.44, + "end": 56.12, + "text": " Yeah, we were.", + "tokens": [ + 865, + 11, + 321, + 645, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.02, + "words": [ + { + "text": "Yeah,", + "start": 55.44, + "end": 55.48, + "confidence": 0.024 + }, + { + "text": "we", + "start": 55.48, + "end": 55.52, + "confidence": 0.022 + }, + { + "text": "were.", + "start": 55.52, + "end": 56.12, + "confidence": 0.017 + } + ] + }, + { + "id": 28, + "seek": 5500, + "start": 56.5, + "end": 57.48, + "text": " You want to hack me on that?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 300, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.019, + "words": [ + { + "text": "You", + "start": 56.5, + "end": 56.62, + "confidence": 0.007 + }, + { + "text": "want", + "start": 56.62, + "end": 57.0, + "confidence": 0.01 + }, + { + "text": "to", + "start": 57.0, + "end": 57.22, + "confidence": 0.257 + }, + { + "text": "hack", + "start": 57.22, + "end": 57.36, + "confidence": 0.0 + }, + { + "text": "me", + "start": 57.36, + "end": 57.4, + "confidence": 0.054 + }, + { + "text": "on", + "start": 57.4, + "end": 57.44, + "confidence": 0.005 + }, + { + "text": "that?", + "start": 57.44, + "end": 57.48, + "confidence": 0.22 + } + ] + }, + { + "id": 29, + "seek": 5500, + "start": 57.48, + "end": 58.26, + "text": " Yeah, we were.", + "tokens": [ + 865, + 11, + 321, + 645, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.047, + "words": [ + { + "text": "Yeah,", + "start": 57.48, + "end": 57.96, + "confidence": 0.029 + }, + { + "text": "we", + "start": 57.96, + "end": 58.06, + "confidence": 0.096 + }, + { + "text": "were.", + "start": 58.06, + "end": 58.26, + "confidence": 0.053 + } + ] + }, + { + "id": 30, + "seek": 5500, + "start": 58.28, + "end": 59.48, + "text": " You want to hack me on that?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 300, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.055, + "words": [ + { + "text": "You", + "start": 58.28, + "end": 58.46, + "confidence": 0.029 + }, + { + "text": "want", + "start": 58.46, + "end": 58.6, + "confidence": 0.107 + }, + { + "text": "to", + "start": 58.6, + "end": 58.64, + "confidence": 0.376 + }, + { + "text": "hack", + "start": 58.64, + "end": 58.76, + "confidence": 0.0 + }, + { + "text": "me", + "start": 58.76, + "end": 58.96, + "confidence": 0.442 + }, + { + "text": "on", + "start": 58.96, + "end": 59.18, + "confidence": 0.266 + }, + { + "text": "that?", + "start": 59.18, + "end": 59.48, + "confidence": 0.186 + } + ] + }, + { + "id": 31, + "seek": 5500, + "start": 59.5, + "end": 61.96, + "text": " With the cover, I tried it already.", + "tokens": [ + 2022, + 264, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.202, + "words": [ + { + "text": "With", + "start": 59.5, + "end": 60.1, + "confidence": 0.012 + }, + { + "text": "the", + "start": 60.1, + "end": 60.82, + "confidence": 0.238 + }, + { + "text": "cover,", + "start": 60.82, + "end": 61.3, + "confidence": 0.165 + }, + { + "text": "I", + "start": 61.3, + "end": 61.34, + "confidence": 0.826 + }, + { + "text": "tried", + "start": 61.34, + "end": 61.52, + "confidence": 0.019 + }, + { + "text": "it", + "start": 61.52, + "end": 61.72, + "confidence": 0.699 + }, + { + "text": "already.", + "start": 61.72, + "end": 61.96, + "confidence": 0.832 + } + ] + }, + { + "id": 32, + "seek": 5500, + "start": 62.56, + "end": 63.16, + "text": " Okay, fine.", + "tokens": [ + 1033, + 11, + 2489, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.542, + "words": [ + { + "text": "Okay,", + "start": 62.56, + "end": 62.98, + "confidence": 0.49 + }, + { + "text": "fine.", + "start": 62.98, + "end": 63.16, + "confidence": 0.601 + } + ] + }, + { + "id": 33, + "seek": 5500, + "start": 63.2, + "end": 64.16, + "text": " We weren't sure of that.", + "tokens": [ + 492, + 4999, + 380, + 988, + 295, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.75, + "words": [ + { + "text": "We", + "start": 63.2, + "end": 63.36, + "confidence": 0.468 + }, + { + "text": "weren't", + "start": 63.36, + "end": 63.6, + "confidence": 0.939 + }, + { + "text": "sure", + "start": 63.6, + "end": 63.8, + "confidence": 0.916 + }, + { + "text": "of", + "start": 63.8, + "end": 63.88, + "confidence": 0.594 + }, + { + "text": "that.", + "start": 63.88, + "end": 64.16, + "confidence": 0.771 + } + ] + }, + { + "id": 34, + "seek": 5500, + "start": 64.48, + "end": 65.14, + "text": " Just a suggestion.", + "tokens": [ + 1449, + 257, + 16541, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.163, + "words": [ + { + "text": "Just", + "start": 64.48, + "end": 64.64, + "confidence": 0.011 + }, + { + "text": "a", + "start": 64.64, + "end": 64.78, + "confidence": 0.428 + }, + { + "text": "suggestion.", + "start": 64.78, + "end": 65.14, + "confidence": 0.386 + } + ] + }, + { + "id": 35, + "seek": 5500, + "start": 65.16, + "end": 68.04, + "text": " We thought we'd, uh, say you could check it out.", + "tokens": [ + 492, + 1194, + 321, + 1116, + 11, + 2232, + 11, + 584, + 291, + 727, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.546, + "words": [ + { + "text": "We", + "start": 65.16, + "end": 65.42, + "confidence": 0.164 + }, + { + "text": "thought", + "start": 65.42, + "end": 65.62, + "confidence": 0.835 + }, + { + "text": "we'd,", + "start": 65.62, + "end": 66.14, + "confidence": 0.391 + }, + { + "text": "uh,", + "start": 66.14, + "end": 66.96, + "confidence": 0.678 + }, + { + "text": "say", + "start": 66.96, + "end": 67.0, + "confidence": 0.181 + }, + { + "text": "you", + "start": 67.0, + "end": 67.14, + "confidence": 0.825 + }, + { + "text": "could", + "start": 67.14, + "end": 67.3, + "confidence": 0.715 + }, + { + "text": "check", + "start": 67.3, + "end": 67.44, + "confidence": 0.637 + }, + { + "text": "it", + "start": 67.44, + "end": 67.66, + "confidence": 0.982 + }, + { + "text": "out.", + "start": 67.66, + "end": 68.04, + "confidence": 0.908 + } + ] + }, + { + "id": 36, + "seek": 5500, + "start": 68.24, + "end": 69.46, + "text": " It's not much of an order to turn that.", + "tokens": [ + 467, + 311, + 406, + 709, + 295, + 364, + 1668, + 281, + 1261, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.139, + "words": [ + { + "text": "It's", + "start": 68.24, + "end": 68.46, + "confidence": 0.077 + }, + { + "text": "not", + "start": 68.46, + "end": 68.5, + "confidence": 0.09 + }, + { + "text": "much", + "start": 68.5, + "end": 68.64, + "confidence": 0.342 + }, + { + "text": "of", + "start": 68.64, + "end": 68.78, + "confidence": 0.388 + }, + { + "text": "an", + "start": 68.78, + "end": 68.9, + "confidence": 0.399 + }, + { + "text": "order", + "start": 68.9, + "end": 68.94, + "confidence": 0.018 + }, + { + "text": "to", + "start": 68.94, + "end": 69.14, + "confidence": 0.122 + }, + { + "text": "turn", + "start": 69.14, + "end": 69.18, + "confidence": 0.447 + }, + { + "text": "that.", + "start": 69.18, + "end": 69.46, + "confidence": 0.116 + } + ] + }, + { + "id": 37, + "seek": 5500, + "start": 70.38, + "end": 71.98, + "text": " So, uh, I guess we're going to come up with this.", + "tokens": [ + 407, + 11, + 2232, + 11, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.449, + "words": [ + { + "text": "So,", + "start": 70.38, + "end": 70.42, + "confidence": 0.514 + }, + { + "text": "uh,", + "start": 70.42, + "end": 70.5, + "confidence": 0.38 + }, + { + "text": "I", + "start": 70.5, + "end": 70.54, + "confidence": 0.946 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.8, + "confidence": 0.99 + }, + { + "text": "we're", + "start": 70.8, + "end": 71.1, + "confidence": 0.534 + }, + { + "text": "going", + "start": 71.1, + "end": 71.28, + "confidence": 0.106 + }, + { + "text": "to", + "start": 71.28, + "end": 71.42, + "confidence": 0.915 + }, + { + "text": "come", + "start": 71.42, + "end": 71.48, + "confidence": 0.48 + }, + { + "text": "up", + "start": 71.48, + "end": 71.68, + "confidence": 0.579 + }, + { + "text": "with", + "start": 71.68, + "end": 71.88, + "confidence": 0.809 + }, + { + "text": "this.", + "start": 71.88, + "end": 71.98, + "confidence": 0.166 + } + ] + }, + { + "id": 38, + "seek": 5500, + "start": 72.0, + "end": 72.8, + "text": " Let us know.", + "tokens": [ + 961, + 505, + 458, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.45, + "words": [ + { + "text": "Let", + "start": 72.0, + "end": 72.14, + "confidence": 0.221 + }, + { + "text": "us", + "start": 72.14, + "end": 72.3, + "confidence": 0.272 + }, + { + "text": "know.", + "start": 72.3, + "end": 72.8, + "confidence": 0.826 + } + ] + }, + { + "id": 39, + "seek": 5500, + "start": 72.82, + "end": 73.02, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.049, + "words": [ + { + "text": "Okay.", + "start": 72.82, + "end": 73.02, + "confidence": 0.049 + } + ] + }, + { + "id": 40, + "seek": 5500, + "start": 74.2, + "end": 75.48, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.077, + "words": [ + { + "text": "No", + "start": 74.2, + "end": 74.24, + "confidence": 0.005 + }, + { + "text": "problem.", + "start": 74.24, + "end": 75.48, + "confidence": 0.295 + } + ] + }, + { + "id": 41, + "seek": 5500, + "start": 75.72, + "end": 75.94, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.263, + "words": [ + { + "text": "Okay.", + "start": 75.72, + "end": 75.94, + "confidence": 0.263 + } + ] + }, + { + "id": 42, + "seek": 5500, + "start": 75.96, + "end": 76.66, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.588, + "words": [ + { + "text": "No", + "start": 75.96, + "end": 76.24, + "confidence": 0.447 + }, + { + "text": "problem.", + "start": 76.24, + "end": 76.66, + "confidence": 0.673 + } + ] + }, + { + "id": 43, + "seek": 5500, + "start": 78.42, + "end": 78.5, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.031, + "words": [ + { + "text": "No", + "start": 78.42, + "end": 78.46, + "confidence": 0.032 + }, + { + "text": "problem.", + "start": 78.46, + "end": 78.5, + "confidence": 0.03 + } + ] + }, + { + "id": 44, + "seek": 5500, + "start": 78.5, + "end": 79.28, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.013, + "words": [ + { + "text": "No", + "start": 78.5, + "end": 78.7, + "confidence": 0.001 + }, + { + "text": "problem.", + "start": 78.7, + "end": 79.28, + "confidence": 0.044 + } + ] + }, + { + "id": 45, + "seek": 5500, + "start": 79.3, + "end": 80.3, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.017, + "words": [ + { + "text": "No", + "start": 79.3, + "end": 79.48, + "confidence": 0.002 + }, + { + "text": "problem.", + "start": 79.48, + "end": 80.3, + "confidence": 0.056 + } + ] + }, + { + "id": 46, + "seek": 8000, + "start": 80.32, + "end": 84.94, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3978840112686157, + "compression_ratio": 1.5217391304347827, + "no_speech_prob": 0.0009465877083130181, + "confidence": 0.029, + "words": [ + { + "text": "No", + "start": 80.32, + "end": 80.54, + "confidence": 0.004 + }, + { + "text": "problem.", + "start": 80.54, + "end": 84.94, + "confidence": 0.082 + } + ] + }, + { + "id": 47, + "seek": 8000, + "start": 85.24, + "end": 87.1, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3978840112686157, + "compression_ratio": 1.5217391304347827, + "no_speech_prob": 0.0009465877083130181, + "confidence": 0.03, + "words": [ + { + "text": "No", + "start": 85.24, + "end": 85.4, + "confidence": 0.002 + }, + { + "text": "problem.", + "start": 85.4, + "end": 87.1, + "confidence": 0.128 + } + ] + }, + { + "id": 48, + "seek": 8800, + "start": 87.52, + "end": 88.98, + "text": " No problem.", + "tokens": [ + 50364, + 883, + 1154, + 13, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.649966835975647, + "compression_ratio": 0.5789473684210527, + "no_speech_prob": 0.0005261301412247121, + "confidence": 0.037, + "words": [ + { + "text": "No", + "start": 87.52, + "end": 88.9, + "confidence": 0.003 + }, + { + "text": "problem.", + "start": 88.9, + "end": 88.98, + "confidence": 0.137 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive.cuda/naive_apollo11.mp3.words.json b/tests/expected/naive.cuda/naive_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1d7628fb1df3720ce20a998436c90ebb59cb86c7 --- /dev/null +++ b/tests/expected/naive.cuda/naive_apollo11.mp3.words.json @@ -0,0 +1,3814 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.84, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7235679626464844, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993799924850464, + "confidence": 0.494, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.52, + "confidence": 0.842 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.78, + "confidence": 0.974 + }, + { + "text": "we", + "start": 1.78, + "end": 1.98, + "confidence": 0.453 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.791 + }, + { + "text": "a", + "start": 2.16, + "end": 2.32, + "confidence": 0.992 + }, + { + "text": "recommendation", + "start": 2.32, + "end": 3.08, + "confidence": 0.97 + }, + { + "text": "for", + "start": 3.08, + "end": 3.5, + "confidence": 0.945 + }, + { + "text": "you", + "start": 3.5, + "end": 3.72, + "confidence": 0.99 + }, + { + "text": "on", + "start": 3.72, + "end": 4.2, + "confidence": 0.935 + }, + { + "text": "your", + "start": 4.2, + "end": 4.24, + "confidence": 0.974 + }, + { + "text": "Soyuz-VA", + "start": 4.24, + "end": 5.32, + "confidence": 0.327 + }, + { + "text": "GLEME", + "start": 5.32, + "end": 6.02, + "confidence": 0.178 + }, + { + "text": "GVA.", + "start": 6.02, + "end": 6.84, + "confidence": 0.321 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 19.28, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7235679626464844, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993799924850464, + "confidence": 0.335, + "words": [ + { + "text": "Alright,", + "start": 10.82, + "end": 12.06, + "confidence": 0.126 + }, + { + "text": "okay,", + "start": 12.06, + "end": 13.06, + "confidence": 0.394 + }, + { + "text": "we", + "start": 13.06, + "end": 13.1, + "confidence": 0.478 + }, + { + "text": "like", + "start": 13.1, + "end": 13.48, + "confidence": 0.493 + }, + { + "text": "to", + "start": 13.48, + "end": 13.66, + "confidence": 0.128 + }, + { + "text": "say", + "start": 13.66, + "end": 14.94, + "confidence": 0.084 + }, + { + "text": "that", + "start": 14.94, + "end": 15.54, + "confidence": 0.225 + }, + { + "text": "they", + "start": 15.54, + "end": 15.58, + "confidence": 0.328 + }, + { + "text": "make", + "start": 15.58, + "end": 15.78, + "confidence": 0.162 + }, + { + "text": "the", + "start": 15.78, + "end": 15.92, + "confidence": 0.226 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.559 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.36, + "confidence": 0.367 + }, + { + "text": "on", + "start": 16.36, + "end": 16.6, + "confidence": 0.387 + }, + { + "text": "the", + "start": 16.6, + "end": 16.84, + "confidence": 0.354 + }, + { + "text": "helmet", + "start": 16.84, + "end": 17.36, + "confidence": 0.205 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.84, + "confidence": 0.297 + }, + { + "text": "going", + "start": 17.84, + "end": 18.04, + "confidence": 0.432 + }, + { + "text": "to", + "start": 18.04, + "end": 18.18, + "confidence": 0.448 + }, + { + "text": "have", + "start": 18.18, + "end": 18.24, + "confidence": 0.761 + }, + { + "text": "in", + "start": 18.24, + "end": 18.46, + "confidence": 0.743 + }, + { + "text": "B1.", + "start": 18.46, + "end": 19.28, + "confidence": 0.753 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 19.38, + "end": 24.7, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7235679626464844, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993799924850464, + "confidence": 0.123, + "words": [ + { + "text": "And", + "start": 19.38, + "end": 20.14, + "confidence": 0.321 + }, + { + "text": "you", + "start": 20.14, + "end": 20.36, + "confidence": 0.904 + }, + { + "text": "can", + "start": 20.36, + "end": 20.54, + "confidence": 0.363 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.939 + }, + { + "text": "the", + "start": 20.72, + "end": 20.86, + "confidence": 0.978 + }, + { + "text": "other", + "start": 20.86, + "end": 21.06, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.965 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.961 + }, + { + "text": "the", + "start": 21.68, + "end": 21.74, + "confidence": 0.199 + }, + { + "text": "mic", + "start": 21.74, + "end": 22.58, + "confidence": 0.694 + }, + { + "text": "helmet", + "start": 22.58, + "end": 22.94, + "confidence": 0.007 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.059 + }, + { + "text": "those", + "start": 23.2, + "end": 23.46, + "confidence": 0.046 + }, + { + "text": "GVA", + "start": 23.46, + "end": 23.86, + "confidence": 0.001 + }, + { + "text": "blizzard", + "start": 23.86, + "end": 24.32, + "confidence": 0.032 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.7, + "confidence": 0.231 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 31.36, + "end": 54.68, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.10793833414713541, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.0011014570482075214, + "confidence": 0.887, + "words": [ + { + "text": "Alright,", + "start": 31.36, + "end": 31.98, + "confidence": 0.077 + }, + { + "text": "got", + "start": 31.98, + "end": 32.02, + "confidence": 0.169 + }, + { + "text": "them,", + "start": 32.02, + "end": 32.52, + "confidence": 0.189 + }, + { + "text": "got", + "start": 32.52, + "end": 33.0, + "confidence": 0.429 + }, + { + "text": "them,", + "start": 33.0, + "end": 33.78, + "confidence": 0.613 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.67 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.56, + "confidence": 0.495 + }, + { + "text": "got", + "start": 34.56, + "end": 34.74, + "confidence": 0.664 + }, + { + "text": "them,", + "start": 34.74, + "end": 34.78, + "confidence": 0.486 + }, + { + "text": "got", + "start": 34.78, + "end": 35.02, + "confidence": 0.756 + }, + { + "text": "them,", + "start": 35.02, + "end": 35.06, + "confidence": 0.565 + }, + { + "text": "got", + "start": 35.06, + "end": 35.1, + "confidence": 0.781 + }, + { + "text": "them,", + "start": 35.1, + "end": 35.14, + "confidence": 0.684 + }, + { + "text": "got", + "start": 35.14, + "end": 35.32, + "confidence": 0.858 + }, + { + "text": "them,", + "start": 35.32, + "end": 35.98, + "confidence": 0.803 + }, + { + "text": "got", + "start": 35.98, + "end": 36.02, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.06, + "confidence": 0.767 + }, + { + "text": "got", + "start": 36.06, + "end": 36.1, + "confidence": 0.584 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.14, + "confidence": 0.843 + }, + { + "text": "got", + "start": 36.14, + "end": 36.18, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 36.18, + "end": 36.22, + "confidence": 0.887 + }, + { + "text": "got", + "start": 36.22, + "end": 36.26, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 36.26, + "end": 36.4, + "confidence": 0.903 + }, + { + "text": "got", + "start": 36.4, + "end": 36.44, + "confidence": 0.939 + }, + { + "text": "them,", + "start": 36.44, + "end": 36.48, + "confidence": 0.909 + }, + { + "text": "got", + "start": 36.48, + "end": 36.52, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.52, + "end": 36.56, + "confidence": 0.902 + }, + { + "text": "got", + "start": 36.56, + "end": 36.6, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 36.6, + "end": 36.64, + "confidence": 0.904 + }, + { + "text": "got", + "start": 36.64, + "end": 36.68, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 36.68, + "end": 36.72, + "confidence": 0.911 + }, + { + "text": "got", + "start": 36.72, + "end": 36.76, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.76, + "end": 36.8, + "confidence": 0.918 + }, + { + "text": "got", + "start": 36.8, + "end": 36.84, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.84, + "end": 36.88, + "confidence": 0.923 + }, + { + "text": "got", + "start": 36.88, + "end": 36.92, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 36.92, + "end": 36.96, + "confidence": 0.93 + }, + { + "text": "got", + "start": 36.96, + "end": 37.0, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 37.0, + "end": 37.04, + "confidence": 0.936 + }, + { + "text": "got", + "start": 37.04, + "end": 37.08, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 37.08, + "end": 37.12, + "confidence": 0.941 + }, + { + "text": "got", + "start": 37.12, + "end": 37.16, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 37.16, + "end": 37.2, + "confidence": 0.944 + }, + { + "text": "got", + "start": 37.2, + "end": 37.24, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 37.24, + "end": 37.28, + "confidence": 0.947 + }, + { + "text": "got", + "start": 37.28, + "end": 37.32, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 37.32, + "end": 37.36, + "confidence": 0.95 + }, + { + "text": "got", + "start": 37.36, + "end": 37.4, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 37.4, + "end": 37.44, + "confidence": 0.953 + }, + { + "text": "got", + "start": 37.44, + "end": 37.48, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 37.48, + "end": 37.52, + "confidence": 0.956 + }, + { + "text": "got", + "start": 37.52, + "end": 37.56, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 37.56, + "end": 37.6, + "confidence": 0.956 + }, + { + "text": "got", + "start": 37.6, + "end": 37.64, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.64, + "end": 37.68, + "confidence": 0.957 + }, + { + "text": "got", + "start": 37.68, + "end": 37.72, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 37.72, + "end": 37.76, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.76, + "end": 37.8, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 37.8, + "end": 37.84, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.84, + "end": 37.88, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 37.88, + "end": 37.92, + "confidence": 0.962 + }, + { + "text": "got", + "start": 37.92, + "end": 37.96, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 37.96, + "end": 38.0, + "confidence": 0.964 + }, + { + "text": "got", + "start": 38.0, + "end": 38.04, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.04, + "end": 38.08, + "confidence": 0.965 + }, + { + "text": "got", + "start": 38.08, + "end": 38.12, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.12, + "end": 38.16, + "confidence": 0.965 + }, + { + "text": "got", + "start": 38.16, + "end": 38.2, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 38.2, + "end": 38.24, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.24, + "end": 38.28, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.28, + "end": 38.32, + "confidence": 0.969 + }, + { + "text": "got", + "start": 38.32, + "end": 38.36, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.36, + "end": 38.4, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.4, + "end": 38.44, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.44, + "end": 38.48, + "confidence": 0.971 + }, + { + "text": "got", + "start": 38.48, + "end": 38.52, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 38.52, + "end": 38.56, + "confidence": 0.972 + }, + { + "text": "got", + "start": 38.56, + "end": 38.6, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 38.6, + "end": 38.64, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.64, + "end": 38.68, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 38.68, + "end": 38.72, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.72, + "end": 38.76, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.76, + "end": 38.8, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.8, + "end": 38.84, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 38.84, + "end": 38.88, + "confidence": 0.976 + }, + { + "text": "got", + "start": 38.88, + "end": 38.92, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 38.92, + "end": 38.96, + "confidence": 0.978 + }, + { + "text": "got", + "start": 38.96, + "end": 39.0, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 39.0, + "end": 39.04, + "confidence": 0.978 + }, + { + "text": "got", + "start": 39.04, + "end": 39.08, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 39.08, + "end": 39.12, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.12, + "end": 39.16, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 39.16, + "end": 39.2, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.2, + "end": 39.24, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 39.24, + "end": 39.28, + "confidence": 0.981 + }, + { + "text": "got", + "start": 39.28, + "end": 39.32, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.32, + "end": 39.36, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.36, + "end": 39.4, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 39.4, + "end": 39.44, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.44, + "end": 39.48, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.48, + "end": 39.52, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.52, + "end": 39.56, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.56, + "end": 39.6, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.6, + "end": 39.64, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.64, + "end": 39.68, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.68, + "end": 39.72, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.72, + "end": 39.76, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.76, + "end": 39.8, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.8, + "end": 39.84, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.84, + "end": 39.88, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.88, + "end": 39.92, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.92, + "end": 39.96, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.96, + "end": 40.0, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.0, + "end": 40.04, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.04, + "end": 40.08, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.08, + "end": 40.12, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.12, + "end": 40.16, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.16, + "end": 40.2, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.2, + "end": 40.24, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.24, + "end": 40.28, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.28, + "end": 40.32, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.32, + "end": 40.36, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.36, + "end": 40.4, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.4, + "end": 40.44, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.44, + "end": 40.48, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.48, + "end": 40.52, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.52, + "end": 40.56, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.56, + "end": 40.6, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.6, + "end": 40.64, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.64, + "end": 40.68, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.68, + "end": 40.86, + "confidence": 0.989 + }, + { + "text": "got", + "start": 40.86, + "end": 40.9, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.9, + "end": 40.94, + "confidence": 0.989 + }, + { + "text": "got", + "start": 40.94, + "end": 40.98, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.98, + "end": 41.24, + "confidence": 0.989 + }, + { + "text": "got", + "start": 41.24, + "end": 41.36, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.36, + "end": 42.3, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.3, + "end": 42.5, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 42.5, + "end": 42.98, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.98, + "end": 44.02, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.02, + "end": 44.42, + "confidence": 0.989 + }, + { + "text": "got", + "start": 44.42, + "end": 44.78, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.78, + "end": 45.16, + "confidence": 0.989 + }, + { + "text": "got", + "start": 45.16, + "end": 46.22, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.22, + "end": 46.52, + "confidence": 0.99 + }, + { + "text": "got", + "start": 46.52, + "end": 46.72, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.72, + "end": 47.72, + "confidence": 0.99 + }, + { + "text": "got", + "start": 47.72, + "end": 48.58, + "confidence": 0.993 + }, + { + "text": "them", + "start": 48.58, + "end": 54.68, + "confidence": 0.996 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 72.0, + "end": 84.66, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.05313390894321048, + "compression_ratio": 29.52, + "no_speech_prob": 0.24548843502998352, + "confidence": 0.84, + "words": [ + { + "text": "got", + "start": 72.0, + "end": 72.04, + "confidence": 0.005 + }, + { + "text": "them,", + "start": 72.04, + "end": 72.08, + "confidence": 0.007 + }, + { + "text": "got", + "start": 72.08, + "end": 72.12, + "confidence": 0.049 + }, + { + "text": "them,", + "start": 72.12, + "end": 72.16, + "confidence": 0.35 + }, + { + "text": "got", + "start": 72.16, + "end": 72.2, + "confidence": 0.65 + }, + { + "text": "them,", + "start": 72.2, + "end": 72.24, + "confidence": 0.549 + }, + { + "text": "got", + "start": 72.24, + "end": 72.28, + "confidence": 0.837 + }, + { + "text": "them,", + "start": 72.28, + "end": 72.32, + "confidence": 0.614 + }, + { + "text": "got", + "start": 72.32, + "end": 72.36, + "confidence": 0.891 + }, + { + "text": "them,", + "start": 72.36, + "end": 72.4, + "confidence": 0.665 + }, + { + "text": "got", + "start": 72.4, + "end": 72.44, + "confidence": 0.909 + }, + { + "text": "them,", + "start": 72.44, + "end": 72.48, + "confidence": 0.689 + }, + { + "text": "got", + "start": 72.48, + "end": 72.52, + "confidence": 0.861 + }, + { + "text": "them,", + "start": 72.52, + "end": 72.56, + "confidence": 0.772 + }, + { + "text": "got", + "start": 72.56, + "end": 72.6, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 72.6, + "end": 72.64, + "confidence": 0.84 + }, + { + "text": "got", + "start": 72.64, + "end": 72.68, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 72.68, + "end": 72.72, + "confidence": 0.633 + }, + { + "text": "got", + "start": 72.72, + "end": 72.76, + "confidence": 0.173 + }, + { + "text": "them,", + "start": 72.76, + "end": 72.8, + "confidence": 0.776 + }, + { + "text": "got", + "start": 72.8, + "end": 72.84, + "confidence": 0.824 + }, + { + "text": "them,", + "start": 72.84, + "end": 72.88, + "confidence": 0.832 + }, + { + "text": "got", + "start": 72.88, + "end": 72.92, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 72.92, + "end": 72.96, + "confidence": 0.849 + }, + { + "text": "got", + "start": 72.96, + "end": 73.0, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 73.0, + "end": 73.04, + "confidence": 0.867 + }, + { + "text": "got", + "start": 73.04, + "end": 73.08, + "confidence": 0.87 + }, + { + "text": "them,", + "start": 73.08, + "end": 73.12, + "confidence": 0.893 + }, + { + "text": "got", + "start": 73.12, + "end": 73.16, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 73.16, + "end": 73.2, + "confidence": 0.905 + }, + { + "text": "got", + "start": 73.2, + "end": 73.24, + "confidence": 0.898 + }, + { + "text": "them,", + "start": 73.24, + "end": 73.28, + "confidence": 0.906 + }, + { + "text": "got", + "start": 73.28, + "end": 73.32, + "confidence": 0.876 + }, + { + "text": "them,", + "start": 73.32, + "end": 73.36, + "confidence": 0.916 + }, + { + "text": "got", + "start": 73.36, + "end": 73.4, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 73.4, + "end": 73.44, + "confidence": 0.923 + }, + { + "text": "got", + "start": 73.44, + "end": 73.48, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 73.48, + "end": 73.52, + "confidence": 0.931 + }, + { + "text": "got", + "start": 73.52, + "end": 73.56, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 73.56, + "end": 73.6, + "confidence": 0.935 + }, + { + "text": "got", + "start": 73.6, + "end": 73.64, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 73.64, + "end": 73.68, + "confidence": 0.941 + }, + { + "text": "got", + "start": 73.68, + "end": 73.72, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 73.72, + "end": 73.76, + "confidence": 0.944 + }, + { + "text": "got", + "start": 73.76, + "end": 73.8, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 73.8, + "end": 73.84, + "confidence": 0.948 + }, + { + "text": "got", + "start": 73.84, + "end": 73.88, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 73.88, + "end": 73.92, + "confidence": 0.95 + }, + { + "text": "got", + "start": 73.92, + "end": 73.96, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 73.96, + "end": 74.0, + "confidence": 0.953 + }, + { + "text": "got", + "start": 74.0, + "end": 74.04, + "confidence": 0.927 + }, + { + "text": "them,", + "start": 74.04, + "end": 74.08, + "confidence": 0.956 + }, + { + "text": "got", + "start": 74.08, + "end": 74.12, + "confidence": 0.927 + }, + { + "text": "them,", + "start": 74.12, + "end": 74.16, + "confidence": 0.956 + }, + { + "text": "got", + "start": 74.16, + "end": 74.2, + "confidence": 0.928 + }, + { + "text": "them,", + "start": 74.2, + "end": 74.24, + "confidence": 0.957 + }, + { + "text": "got", + "start": 74.24, + "end": 74.28, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.28, + "end": 74.32, + "confidence": 0.959 + }, + { + "text": "got", + "start": 74.32, + "end": 74.36, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.36, + "end": 74.4, + "confidence": 0.96 + }, + { + "text": "got", + "start": 74.4, + "end": 74.44, + "confidence": 0.931 + }, + { + "text": "them,", + "start": 74.44, + "end": 74.48, + "confidence": 0.961 + }, + { + "text": "got", + "start": 74.48, + "end": 74.52, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 74.52, + "end": 74.56, + "confidence": 0.962 + }, + { + "text": "got", + "start": 74.56, + "end": 74.6, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 74.6, + "end": 74.64, + "confidence": 0.963 + }, + { + "text": "got", + "start": 74.64, + "end": 74.68, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 74.68, + "end": 74.72, + "confidence": 0.963 + }, + { + "text": "got", + "start": 74.72, + "end": 74.76, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 74.76, + "end": 74.8, + "confidence": 0.964 + }, + { + "text": "got", + "start": 74.8, + "end": 74.84, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 74.84, + "end": 74.88, + "confidence": 0.964 + }, + { + "text": "got", + "start": 74.88, + "end": 74.92, + "confidence": 0.939 + }, + { + "text": "them,", + "start": 74.92, + "end": 74.96, + "confidence": 0.965 + }, + { + "text": "got", + "start": 74.96, + "end": 75.0, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 75.0, + "end": 75.04, + "confidence": 0.966 + }, + { + "text": "got", + "start": 75.04, + "end": 75.08, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 75.08, + "end": 75.12, + "confidence": 0.967 + }, + { + "text": "got", + "start": 75.12, + "end": 75.16, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 75.16, + "end": 75.2, + "confidence": 0.968 + }, + { + "text": "got", + "start": 75.2, + "end": 75.24, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 75.24, + "end": 75.28, + "confidence": 0.968 + }, + { + "text": "got", + "start": 75.28, + "end": 75.32, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 75.32, + "end": 75.36, + "confidence": 0.969 + }, + { + "text": "got", + "start": 75.36, + "end": 75.4, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 75.4, + "end": 75.44, + "confidence": 0.969 + }, + { + "text": "got", + "start": 75.44, + "end": 75.48, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 75.48, + "end": 75.52, + "confidence": 0.97 + }, + { + "text": "got", + "start": 75.52, + "end": 75.56, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 75.56, + "end": 75.6, + "confidence": 0.971 + }, + { + "text": "got", + "start": 75.6, + "end": 75.64, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 75.64, + "end": 75.68, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.68, + "end": 75.72, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 75.72, + "end": 75.76, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.76, + "end": 75.8, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 75.8, + "end": 75.84, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.84, + "end": 75.88, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.88, + "end": 75.92, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.92, + "end": 75.96, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 75.96, + "end": 76.0, + "confidence": 0.975 + }, + { + "text": "got", + "start": 76.0, + "end": 76.04, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 76.04, + "end": 76.08, + "confidence": 0.975 + }, + { + "text": "got", + "start": 76.08, + "end": 76.12, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 76.12, + "end": 76.16, + "confidence": 0.976 + }, + { + "text": "got", + "start": 76.16, + "end": 76.2, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.2, + "end": 76.24, + "confidence": 0.976 + }, + { + "text": "got", + "start": 76.24, + "end": 76.28, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.28, + "end": 76.32, + "confidence": 0.975 + }, + { + "text": "got", + "start": 76.32, + "end": 76.36, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 76.36, + "end": 76.4, + "confidence": 0.977 + }, + { + "text": "got", + "start": 76.4, + "end": 76.44, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 76.44, + "end": 76.48, + "confidence": 0.977 + }, + { + "text": "got", + "start": 76.48, + "end": 76.52, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 76.52, + "end": 76.56, + "confidence": 0.978 + }, + { + "text": "got", + "start": 76.56, + "end": 76.6, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 76.6, + "end": 76.64, + "confidence": 0.978 + }, + { + "text": "got", + "start": 76.64, + "end": 76.68, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 76.68, + "end": 76.72, + "confidence": 0.979 + }, + { + "text": "got", + "start": 76.72, + "end": 76.76, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 76.76, + "end": 76.8, + "confidence": 0.978 + }, + { + "text": "got", + "start": 76.8, + "end": 76.84, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 76.84, + "end": 76.88, + "confidence": 0.98 + }, + { + "text": "got", + "start": 76.88, + "end": 76.92, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 76.92, + "end": 76.96, + "confidence": 0.98 + }, + { + "text": "got", + "start": 76.96, + "end": 77.0, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 77.0, + "end": 77.04, + "confidence": 0.98 + }, + { + "text": "got", + "start": 77.04, + "end": 77.08, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 77.08, + "end": 77.12, + "confidence": 0.98 + }, + { + "text": "got", + "start": 77.12, + "end": 77.16, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 77.16, + "end": 77.2, + "confidence": 0.98 + }, + { + "text": "got", + "start": 77.2, + "end": 77.24, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 77.24, + "end": 77.28, + "confidence": 0.981 + }, + { + "text": "got", + "start": 77.28, + "end": 77.32, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 77.32, + "end": 77.36, + "confidence": 0.982 + }, + { + "text": "got", + "start": 77.36, + "end": 77.4, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 77.4, + "end": 77.44, + "confidence": 0.983 + }, + { + "text": "got", + "start": 77.44, + "end": 77.48, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 77.48, + "end": 77.52, + "confidence": 0.982 + }, + { + "text": "got", + "start": 77.52, + "end": 77.56, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 77.56, + "end": 77.6, + "confidence": 0.984 + }, + { + "text": "got", + "start": 77.6, + "end": 77.64, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 77.64, + "end": 78.74, + "confidence": 0.983 + }, + { + "text": "got", + "start": 78.74, + "end": 78.8, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 78.8, + "end": 79.8, + "confidence": 0.984 + }, + { + "text": "got", + "start": 79.8, + "end": 80.08, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 80.08, + "end": 83.98, + "confidence": 0.984 + }, + { + "text": "got", + "start": 83.98, + "end": 84.16, + "confidence": 0.991 + }, + { + "text": "them", + "start": 84.16, + "end": 84.66, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 84.68, + "end": 94.48, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04979915789959142, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971050500869751, + "confidence": 0.848, + "words": [ + { + "text": "got", + "start": 84.68, + "end": 85.42, + "confidence": 0.0 + }, + { + "text": "them,", + "start": 85.42, + "end": 85.92, + "confidence": 0.023 + }, + { + "text": "got", + "start": 85.92, + "end": 86.74, + "confidence": 0.821 + }, + { + "text": "them,", + "start": 86.74, + "end": 87.2, + "confidence": 0.604 + }, + { + "text": "got", + "start": 87.2, + "end": 88.62, + "confidence": 0.84 + }, + { + "text": "them,", + "start": 88.62, + "end": 88.66, + "confidence": 0.451 + }, + { + "text": "got", + "start": 88.66, + "end": 88.84, + "confidence": 0.887 + }, + { + "text": "them,", + "start": 88.84, + "end": 88.88, + "confidence": 0.382 + }, + { + "text": "got", + "start": 88.88, + "end": 88.92, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 88.92, + "end": 88.96, + "confidence": 0.353 + }, + { + "text": "got", + "start": 88.96, + "end": 89.0, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 89.0, + "end": 89.04, + "confidence": 0.352 + }, + { + "text": "got", + "start": 89.04, + "end": 89.08, + "confidence": 0.869 + }, + { + "text": "them,", + "start": 89.08, + "end": 89.12, + "confidence": 0.423 + }, + { + "text": "got", + "start": 89.12, + "end": 89.16, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 89.16, + "end": 89.2, + "confidence": 0.528 + }, + { + "text": "got", + "start": 89.2, + "end": 89.24, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 89.24, + "end": 89.28, + "confidence": 0.619 + }, + { + "text": "got", + "start": 89.28, + "end": 89.32, + "confidence": 0.65 + }, + { + "text": "them,", + "start": 89.32, + "end": 89.36, + "confidence": 0.71 + }, + { + "text": "got", + "start": 89.36, + "end": 89.4, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 89.4, + "end": 89.44, + "confidence": 0.815 + }, + { + "text": "got", + "start": 89.44, + "end": 89.48, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 89.48, + "end": 89.52, + "confidence": 0.874 + }, + { + "text": "got", + "start": 89.52, + "end": 89.56, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 89.56, + "end": 89.6, + "confidence": 0.907 + }, + { + "text": "got", + "start": 89.6, + "end": 89.64, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.64, + "end": 89.68, + "confidence": 0.927 + }, + { + "text": "got", + "start": 89.68, + "end": 89.72, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 89.72, + "end": 89.76, + "confidence": 0.941 + }, + { + "text": "got", + "start": 89.76, + "end": 89.8, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 89.8, + "end": 89.84, + "confidence": 0.942 + }, + { + "text": "got", + "start": 89.84, + "end": 89.88, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 89.88, + "end": 89.92, + "confidence": 0.943 + }, + { + "text": "got", + "start": 89.92, + "end": 89.96, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 89.96, + "end": 90.0, + "confidence": 0.945 + }, + { + "text": "got", + "start": 90.0, + "end": 90.04, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 90.04, + "end": 90.08, + "confidence": 0.952 + }, + { + "text": "got", + "start": 90.08, + "end": 90.12, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 90.12, + "end": 90.16, + "confidence": 0.955 + }, + { + "text": "got", + "start": 90.16, + "end": 90.2, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 90.2, + "end": 90.24, + "confidence": 0.961 + }, + { + "text": "got", + "start": 90.24, + "end": 90.28, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 90.28, + "end": 90.32, + "confidence": 0.964 + }, + { + "text": "got", + "start": 90.32, + "end": 90.36, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 90.36, + "end": 90.4, + "confidence": 0.967 + }, + { + "text": "got", + "start": 90.4, + "end": 90.44, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 90.44, + "end": 90.48, + "confidence": 0.97 + }, + { + "text": "got", + "start": 90.48, + "end": 90.52, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 90.52, + "end": 90.56, + "confidence": 0.973 + }, + { + "text": "got", + "start": 90.56, + "end": 90.6, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 90.6, + "end": 90.64, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.64, + "end": 90.68, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 90.68, + "end": 90.72, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.72, + "end": 90.76, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 90.76, + "end": 90.8, + "confidence": 0.978 + }, + { + "text": "got", + "start": 90.8, + "end": 90.84, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 90.84, + "end": 90.88, + "confidence": 0.979 + }, + { + "text": "got", + "start": 90.88, + "end": 90.92, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 90.92, + "end": 90.96, + "confidence": 0.981 + }, + { + "text": "got", + "start": 90.96, + "end": 91.0, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 91.0, + "end": 91.04, + "confidence": 0.981 + }, + { + "text": "got", + "start": 91.04, + "end": 91.08, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 91.08, + "end": 91.12, + "confidence": 0.982 + }, + { + "text": "got", + "start": 91.12, + "end": 91.16, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 91.16, + "end": 91.2, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.2, + "end": 91.24, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 91.24, + "end": 91.28, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.28, + "end": 91.32, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 91.32, + "end": 91.36, + "confidence": 0.984 + }, + { + "text": "got", + "start": 91.36, + "end": 91.4, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 91.4, + "end": 91.44, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.44, + "end": 91.48, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 91.48, + "end": 91.52, + "confidence": 0.985 + }, + { + "text": "got", + "start": 91.52, + "end": 91.56, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 91.56, + "end": 91.6, + "confidence": 0.985 + }, + { + "text": "got", + "start": 91.6, + "end": 91.64, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 91.64, + "end": 91.68, + "confidence": 0.986 + }, + { + "text": "got", + "start": 91.68, + "end": 91.72, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 91.72, + "end": 91.76, + "confidence": 0.986 + }, + { + "text": "got", + "start": 91.76, + "end": 91.8, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 91.8, + "end": 91.84, + "confidence": 0.987 + }, + { + "text": "got", + "start": 91.84, + "end": 91.88, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 91.88, + "end": 91.92, + "confidence": 0.987 + }, + { + "text": "got", + "start": 91.92, + "end": 91.96, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 91.96, + "end": 92.0, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.0, + "end": 92.04, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 92.04, + "end": 92.08, + "confidence": 0.988 + }, + { + "text": "got", + "start": 92.08, + "end": 92.12, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 92.12, + "end": 92.16, + "confidence": 0.988 + }, + { + "text": "got", + "start": 92.16, + "end": 92.2, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 92.2, + "end": 92.24, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.24, + "end": 92.28, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 92.28, + "end": 92.32, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.32, + "end": 92.36, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 92.36, + "end": 92.4, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.4, + "end": 92.44, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 92.44, + "end": 92.48, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.48, + "end": 92.52, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 92.52, + "end": 92.56, + "confidence": 0.99 + }, + { + "text": "got", + "start": 92.56, + "end": 92.6, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 92.6, + "end": 92.64, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.64, + "end": 92.68, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 92.68, + "end": 92.72, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.72, + "end": 92.76, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 92.76, + "end": 92.8, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.8, + "end": 92.84, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 92.84, + "end": 92.88, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.88, + "end": 92.92, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 92.92, + "end": 92.96, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.96, + "end": 93.0, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 93.0, + "end": 93.04, + "confidence": 0.992 + }, + { + "text": "got", + "start": 93.04, + "end": 93.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 93.08, + "end": 93.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 93.12, + "end": 93.16, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 93.16, + "end": 93.2, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.2, + "end": 93.24, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.24, + "end": 93.28, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.28, + "end": 93.32, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.32, + "end": 93.36, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.36, + "end": 93.4, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.4, + "end": 93.44, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.44, + "end": 93.48, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.48, + "end": 93.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.52, + "end": 93.56, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.56, + "end": 93.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.6, + "end": 93.64, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.64, + "end": 93.68, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.68, + "end": 93.72, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.72, + "end": 93.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.76, + "end": 93.8, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.8, + "end": 93.84, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.84, + "end": 93.88, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.88, + "end": 93.92, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.92, + "end": 93.96, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.96, + "end": 94.0, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.0, + "end": 94.04, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 94.04, + "end": 94.08, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.08, + "end": 94.12, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 94.12, + "end": 94.16, + "confidence": 0.995 + }, + { + "text": "got", + "start": 94.16, + "end": 94.2, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 94.2, + "end": 94.24, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.24, + "end": 94.28, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 94.28, + "end": 94.32, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.32, + "end": 94.36, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 94.36, + "end": 94.4, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.4, + "end": 94.44, + "confidence": 0.995 + }, + { + "text": "them", + "start": 94.44, + "end": 94.48, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive/accurate_apollo11.mp3.words.json b/tests/expected/naive/accurate_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..036a3d0ddb220ae34d81cabd816757325a7672bb --- /dev/null +++ b/tests/expected/naive/accurate_apollo11.mp3.words.json @@ -0,0 +1,2756 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A. All right. Okay. Yeah, sir. Yeah, sir. Let's take that camera. Let's say it makes it want to go on the helmet we were going to have in B-1. The other one. And you can put the other one on the mic helmet. We'll show it to you in a quick screen. Over. All right. Yeah, sir. All right. Yeah, sir. There's a better helmet than B-1. That's the other one. Nice. Let's go in there. At least we're safe. We've got them in there. Helmet bags. And, uh, I guess we have helmets in the helmet bag. At least it's in the helmet bag. Right here. Right here. Yeah, we're taking it next day. Yeah, we're taking it next day. Yeah, it's a little bit of a delay on it. Yeah, you know what. We're gonna hack me on it. We're gonna hack you on it. We're gonna hack you on it. We're gonna hack you on it. With a cover, I tried it already. Okay, fine. We weren't sure of that, uh, just the suggestion. We thought we'd, uh, say you could check it out. It's not much of a hard to turn on, so, uh, I guess, uh, we're gonna come up with it. Let us know. Okay, no problem. Okay, no problem. We're gonna let you know when the end of... None. I'm glad you came. Everything's OK. Everything's OK. Yeah. You're safe.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.54, + "end": 6.66, + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 44, + 12, + 36, + 12, + 38, + 12, + 36, + 12, + 32, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.578, + "words": [ + { + "text": "Apollo", + "start": 0.54, + "end": 0.92, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.92, + "end": 1.26, + "confidence": 0.842 + }, + { + "text": "Houston,", + "start": 1.52, + "end": 1.72, + "confidence": 0.665 + }, + { + "text": "we", + "start": 1.9, + "end": 1.94, + "confidence": 0.983 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.743 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.993 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.977 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.94 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.993 + }, + { + "text": "on", + "start": 3.6, + "end": 3.74, + "confidence": 0.918 + }, + { + "text": "your", + "start": 3.74, + "end": 3.94, + "confidence": 0.976 + }, + { + "text": "Soyuz-EA", + "start": 3.94, + "end": 5.26, + "confidence": 0.455 + }, + { + "text": "GLEM-E-G-E-A.", + "start": 5.26, + "end": 6.66, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 11.08, + "text": " All right.", + "tokens": [ + 50714, + 1057, + 558, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.14, + "words": [ + { + "text": "All", + "start": 10.8, + "end": 10.82, + "confidence": 0.005 + }, + { + "text": "right.", + "start": 10.82, + "end": 11.08, + "confidence": 0.747 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.2, + "end": 12.22, + "text": " Okay.", + "tokens": [ + 50914, + 1033, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.287, + "words": [ + { + "text": "Okay.", + "start": 12.2, + "end": 12.22, + "confidence": 0.287 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 12.22, + "end": 13.38, + "text": " Yeah, sir.", + "tokens": [ + 50964, + 865, + 11, + 4735, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.124, + "words": [ + { + "text": "Yeah,", + "start": 12.22, + "end": 12.64, + "confidence": 0.113 + }, + { + "text": "sir.", + "start": 12.76, + "end": 13.38, + "confidence": 0.137 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.38, + "end": 13.92, + "text": " Yeah, sir.", + "tokens": [ + 51014, + 865, + 11, + 4735, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.048, + "words": [ + { + "text": "Yeah,", + "start": 13.38, + "end": 13.5, + "confidence": 0.089 + }, + { + "text": "sir.", + "start": 13.64, + "end": 13.92, + "confidence": 0.026 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.92, + "end": 14.52, + "text": " Let's take that camera.", + "tokens": [ + 51064, + 961, + 311, + 747, + 300, + 2799, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.054, + "words": [ + { + "text": "Let's", + "start": 13.92, + "end": 14.04, + "confidence": 0.035 + }, + { + "text": "take", + "start": 14.04, + "end": 14.14, + "confidence": 0.027 + }, + { + "text": "that", + "start": 14.14, + "end": 14.22, + "confidence": 0.037 + }, + { + "text": "camera.", + "start": 14.22, + "end": 14.52, + "confidence": 0.14 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 14.52, + "end": 19.08, + "text": " Let's say it makes it want to go on the helmet we were going to have in B-1.", + "tokens": [ + 51114, + 961, + 311, + 584, + 309, + 1669, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 12, + 16, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.29, + "words": [ + { + "text": "Let's", + "start": 14.52, + "end": 14.82, + "confidence": 0.124 + }, + { + "text": "say", + "start": 14.82, + "end": 14.98, + "confidence": 0.29 + }, + { + "text": "it", + "start": 14.98, + "end": 15.04, + "confidence": 0.029 + }, + { + "text": "makes", + "start": 15.04, + "end": 15.64, + "confidence": 0.633 + }, + { + "text": "it", + "start": 15.64, + "end": 15.86, + "confidence": 0.948 + }, + { + "text": "want", + "start": 15.86, + "end": 16.08, + "confidence": 0.249 + }, + { + "text": "to", + "start": 16.08, + "end": 16.18, + "confidence": 0.969 + }, + { + "text": "go", + "start": 16.18, + "end": 16.38, + "confidence": 0.816 + }, + { + "text": "on", + "start": 16.38, + "end": 16.6, + "confidence": 0.934 + }, + { + "text": "the", + "start": 16.6, + "end": 16.76, + "confidence": 0.45 + }, + { + "text": "helmet", + "start": 16.76, + "end": 17.24, + "confidence": 0.003 + }, + { + "text": "we", + "start": 17.24, + "end": 17.54, + "confidence": 0.019 + }, + { + "text": "were", + "start": 17.54, + "end": 17.78, + "confidence": 0.181 + }, + { + "text": "going", + "start": 17.78, + "end": 17.96, + "confidence": 0.637 + }, + { + "text": "to", + "start": 17.96, + "end": 18.12, + "confidence": 0.965 + }, + { + "text": "have", + "start": 18.12, + "end": 18.18, + "confidence": 0.967 + }, + { + "text": "in", + "start": 18.18, + "end": 18.36, + "confidence": 0.902 + }, + { + "text": "B-1.", + "start": 18.36, + "end": 19.08, + "confidence": 0.471 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.08, + "end": 19.84, + "text": " The other one.", + "tokens": [ + 51314, + 440, + 661, + 472, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.126, + "words": [ + { + "text": "The", + "start": 19.08, + "end": 19.38, + "confidence": 0.004 + }, + { + "text": "other", + "start": 19.38, + "end": 19.62, + "confidence": 0.141 + }, + { + "text": "one.", + "start": 19.62, + "end": 19.84, + "confidence": 0.691 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 20.04, + "end": 22.8, + "text": " And you can put the other one on the mic helmet.", + "tokens": [ + 51364, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 13, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.405, + "words": [ + { + "text": "And", + "start": 20.04, + "end": 20.22, + "confidence": 0.344 + }, + { + "text": "you", + "start": 20.22, + "end": 20.32, + "confidence": 0.892 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.126 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.894 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.979 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.996 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.972 + }, + { + "text": "on", + "start": 21.18, + "end": 21.38, + "confidence": 0.969 + }, + { + "text": "the", + "start": 21.38, + "end": 21.66, + "confidence": 0.09 + }, + { + "text": "mic", + "start": 21.66, + "end": 22.48, + "confidence": 0.616 + }, + { + "text": "helmet.", + "start": 22.48, + "end": 22.8, + "confidence": 0.105 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 22.8, + "end": 24.36, + "text": " We'll show it to you in a quick screen.", + "tokens": [ + 51514, + 492, + 603, + 855, + 309, + 281, + 291, + 294, + 257, + 1702, + 2568, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.121, + "words": [ + { + "text": "We'll", + "start": 22.8, + "end": 23.0, + "confidence": 0.08 + }, + { + "text": "show", + "start": 23.0, + "end": 23.24, + "confidence": 0.013 + }, + { + "text": "it", + "start": 23.24, + "end": 23.36, + "confidence": 0.12 + }, + { + "text": "to", + "start": 23.36, + "end": 23.52, + "confidence": 0.356 + }, + { + "text": "you", + "start": 23.52, + "end": 23.64, + "confidence": 0.609 + }, + { + "text": "in", + "start": 23.64, + "end": 23.86, + "confidence": 0.287 + }, + { + "text": "a", + "start": 23.86, + "end": 23.88, + "confidence": 0.723 + }, + { + "text": "quick", + "start": 23.88, + "end": 24.04, + "confidence": 0.016 + }, + { + "text": "screen.", + "start": 24.04, + "end": 24.36, + "confidence": 0.109 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 24.84, + "end": 25.1, + "text": " Over.", + "tokens": [ + 51614, + 4886, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.5322377604822959, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.44994914531707764, + "confidence": 0.062, + "words": [ + { + "text": "Over.", + "start": 24.84, + "end": 25.1, + "confidence": 0.062 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 31.32, + "end": 31.36, + "text": " All right.", + "tokens": [ + 50364, + 1057, + 558, + 13, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.108, + "words": [ + { + "text": "All", + "start": 31.32, + "end": 31.34, + "confidence": 0.003 + }, + { + "text": "right.", + "start": 31.34, + "end": 31.36, + "confidence": 0.677 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 31.36, + "end": 32.06, + "text": " Yeah, sir.", + "tokens": [ + 50614, + 865, + 11, + 4735, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.035, + "words": [ + { + "text": "Yeah,", + "start": 31.36, + "end": 31.56, + "confidence": 0.033 + }, + { + "text": "sir.", + "start": 31.56, + "end": 32.06, + "confidence": 0.036 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 32.26, + "end": 33.0, + "text": " All right.", + "tokens": [ + 50664, + 1057, + 558, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.201, + "words": [ + { + "text": "All", + "start": 32.26, + "end": 32.86, + "confidence": 0.022 + }, + { + "text": "right.", + "start": 32.86, + "end": 33.0, + "confidence": 0.607 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 33.0, + "end": 33.5, + "text": " Yeah, sir.", + "tokens": [ + 50714, + 865, + 11, + 4735, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.044, + "words": [ + { + "text": "Yeah,", + "start": 33.0, + "end": 33.16, + "confidence": 0.208 + }, + { + "text": "sir.", + "start": 33.22, + "end": 33.5, + "confidence": 0.009 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 33.5, + "end": 34.98, + "text": " There's a better helmet than B-1.", + "tokens": [ + 50764, + 821, + 311, + 257, + 1101, + 15922, + 813, + 363, + 12, + 16, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.04, + "words": [ + { + "text": "There's", + "start": 33.5, + "end": 33.9, + "confidence": 0.022 + }, + { + "text": "a", + "start": 33.9, + "end": 33.92, + "confidence": 0.264 + }, + { + "text": "better", + "start": 33.92, + "end": 33.94, + "confidence": 0.228 + }, + { + "text": "helmet", + "start": 33.94, + "end": 34.2, + "confidence": 0.004 + }, + { + "text": "than", + "start": 34.2, + "end": 34.6, + "confidence": 0.352 + }, + { + "text": "B-1.", + "start": 34.6, + "end": 34.98, + "confidence": 0.022 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 35.0, + "end": 35.8, + "text": " That's the other one.", + "tokens": [ + 50814, + 663, + 311, + 264, + 661, + 472, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.277, + "words": [ + { + "text": "That's", + "start": 35.0, + "end": 35.38, + "confidence": 0.161 + }, + { + "text": "the", + "start": 35.38, + "end": 35.48, + "confidence": 0.515 + }, + { + "text": "other", + "start": 35.48, + "end": 35.6, + "confidence": 0.965 + }, + { + "text": "one.", + "start": 35.6, + "end": 35.8, + "confidence": 0.188 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 35.8, + "end": 36.04, + "text": " Nice.", + "tokens": [ + 50864, + 5490, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.011, + "words": [ + { + "text": "Nice.", + "start": 35.8, + "end": 36.04, + "confidence": 0.011 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 37.62, + "end": 38.38, + "text": " Let's go in there.", + "tokens": [ + 50914, + 961, + 311, + 352, + 294, + 456, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.125, + "words": [ + { + "text": "Let's", + "start": 37.62, + "end": 38.1, + "confidence": 0.022 + }, + { + "text": "go", + "start": 38.1, + "end": 38.12, + "confidence": 0.626 + }, + { + "text": "in", + "start": 38.12, + "end": 38.26, + "confidence": 0.037 + }, + { + "text": "there.", + "start": 38.26, + "end": 38.38, + "confidence": 0.579 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 38.38, + "end": 39.2, + "text": " At least we're safe.", + "tokens": [ + 50964, + 1711, + 1935, + 321, + 434, + 3273, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.107, + "words": [ + { + "text": "At", + "start": 38.38, + "end": 38.44, + "confidence": 0.003 + }, + { + "text": "least", + "start": 38.44, + "end": 38.72, + "confidence": 0.125 + }, + { + "text": "we're", + "start": 38.72, + "end": 38.98, + "confidence": 0.091 + }, + { + "text": "safe.", + "start": 38.98, + "end": 39.2, + "confidence": 0.715 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 39.88, + "end": 40.54, + "text": " We've got them in there.", + "tokens": [ + 51014, + 492, + 600, + 658, + 552, + 294, + 456, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.077, + "words": [ + { + "text": "We've", + "start": 39.88, + "end": 40.36, + "confidence": 0.055 + }, + { + "text": "got", + "start": 40.36, + "end": 40.48, + "confidence": 0.749 + }, + { + "text": "them", + "start": 40.48, + "end": 40.5, + "confidence": 0.003 + }, + { + "text": "in", + "start": 40.5, + "end": 40.52, + "confidence": 0.053 + }, + { + "text": "there.", + "start": 40.52, + "end": 40.54, + "confidence": 0.211 + } + ] + }, + { + "id": 21, + "seek": 2600, + "start": 40.54, + "end": 41.04, + "text": " Helmet bags.", + "tokens": [ + 51064, + 6128, + 5537, + 10405, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.001, + "words": [ + { + "text": "Helmet", + "start": 40.54, + "end": 40.66, + "confidence": 0.001 + }, + { + "text": "bags.", + "start": 40.66, + "end": 41.04, + "confidence": 0.002 + } + ] + }, + { + "id": 22, + "seek": 2600, + "start": 43.18, + "end": 47.4, + "text": " And, uh, I guess we have helmets in the helmet bag.", + "tokens": [ + 51114, + 400, + 11, + 2232, + 11, + 286, + 2041, + 321, + 362, + 42022, + 294, + 264, + 15922, + 3411, + 13, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.175, + "words": [ + { + "text": "And,", + "start": 43.18, + "end": 43.2, + "confidence": 0.052 + }, + { + "text": "uh,", + "start": 43.48, + "end": 43.74, + "confidence": 0.745 + }, + { + "text": "I", + "start": 43.86, + "end": 44.04, + "confidence": 0.341 + }, + { + "text": "guess", + "start": 44.04, + "end": 44.2, + "confidence": 0.283 + }, + { + "text": "we", + "start": 44.2, + "end": 44.52, + "confidence": 0.055 + }, + { + "text": "have", + "start": 44.52, + "end": 45.48, + "confidence": 0.053 + }, + { + "text": "helmets", + "start": 45.48, + "end": 46.5, + "confidence": 0.005 + }, + { + "text": "in", + "start": 46.5, + "end": 46.72, + "confidence": 0.216 + }, + { + "text": "the", + "start": 46.72, + "end": 46.92, + "confidence": 0.619 + }, + { + "text": "helmet", + "start": 46.92, + "end": 47.08, + "confidence": 0.831 + }, + { + "text": "bag.", + "start": 47.08, + "end": 47.4, + "confidence": 0.329 + } + ] + }, + { + "id": 23, + "seek": 2600, + "start": 48.2, + "end": 49.52, + "text": " At least it's in the helmet bag.", + "tokens": [ + 51464, + 1711, + 1935, + 309, + 311, + 294, + 264, + 15922, + 3411, + 13, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.122, + "words": [ + { + "text": "At", + "start": 48.2, + "end": 48.34, + "confidence": 0.096 + }, + { + "text": "least", + "start": 48.34, + "end": 48.6, + "confidence": 0.761 + }, + { + "text": "it's", + "start": 48.6, + "end": 48.86, + "confidence": 0.339 + }, + { + "text": "in", + "start": 48.86, + "end": 48.96, + "confidence": 0.151 + }, + { + "text": "the", + "start": 48.96, + "end": 49.1, + "confidence": 0.223 + }, + { + "text": "helmet", + "start": 49.1, + "end": 49.18, + "confidence": 0.006 + }, + { + "text": "bag.", + "start": 49.18, + "end": 49.52, + "confidence": 0.06 + } + ] + }, + { + "id": 24, + "seek": 2600, + "start": 49.52, + "end": 50.38, + "text": " Right here.", + "tokens": [ + 51564, + 1779, + 510, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.059, + "words": [ + { + "text": "Right", + "start": 49.52, + "end": 49.62, + "confidence": 0.027 + }, + { + "text": "here.", + "start": 49.62, + "end": 50.38, + "confidence": 0.088 + } + ] + }, + { + "id": 25, + "seek": 2600, + "start": 51.52, + "end": 51.98, + "text": " Right here.", + "tokens": [ + 51614, + 1779, + 510, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.017, + "words": [ + { + "text": "Right", + "start": 51.52, + "end": 51.72, + "confidence": 0.001 + }, + { + "text": "here.", + "start": 51.72, + "end": 51.98, + "confidence": 0.067 + } + ] + }, + { + "id": 26, + "seek": 2600, + "start": 51.98, + "end": 52.74, + "text": " Yeah, we're taking it next day.", + "tokens": [ + 51664, + 865, + 11, + 321, + 434, + 1940, + 309, + 958, + 786, + 13, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.047, + "words": [ + { + "text": "Yeah,", + "start": 51.98, + "end": 52.28, + "confidence": 0.043 + }, + { + "text": "we're", + "start": 52.28, + "end": 52.3, + "confidence": 0.288 + }, + { + "text": "taking", + "start": 52.3, + "end": 52.32, + "confidence": 0.005 + }, + { + "text": "it", + "start": 52.32, + "end": 52.34, + "confidence": 0.052 + }, + { + "text": "next", + "start": 52.34, + "end": 52.52, + "confidence": 0.002 + }, + { + "text": "day.", + "start": 52.52, + "end": 52.74, + "confidence": 0.113 + } + ] + }, + { + "id": 27, + "seek": 2600, + "start": 52.74, + "end": 54.1, + "text": " Yeah, we're taking it next day.", + "tokens": [ + 51714, + 865, + 11, + 321, + 434, + 1940, + 309, + 958, + 786, + 13, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.5041401628134907, + "compression_ratio": 1.9243243243243244, + "no_speech_prob": 0.08131657540798187, + "confidence": 0.41, + "words": [ + { + "text": "Yeah,", + "start": 52.74, + "end": 53.24, + "confidence": 0.601 + }, + { + "text": "we're", + "start": 53.34, + "end": 53.44, + "confidence": 0.703 + }, + { + "text": "taking", + "start": 53.44, + "end": 53.62, + "confidence": 0.261 + }, + { + "text": "it", + "start": 53.62, + "end": 53.76, + "confidence": 0.247 + }, + { + "text": "next", + "start": 53.76, + "end": 53.88, + "confidence": 0.404 + }, + { + "text": "day.", + "start": 53.88, + "end": 54.1, + "confidence": 0.266 + } + ] + }, + { + "id": 28, + "seek": 5400, + "start": 54.1, + "end": 55.58, + "text": " Yeah, it's a little bit of a delay on it.", + "tokens": [ + 50364, + 865, + 11, + 309, + 311, + 257, + 707, + 857, + 295, + 257, + 8577, + 322, + 309, + 13, + 50414 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.076, + "words": [ + { + "text": "Yeah,", + "start": 54.1, + "end": 54.16, + "confidence": 0.089 + }, + { + "text": "it's", + "start": 54.24, + "end": 54.48, + "confidence": 0.085 + }, + { + "text": "a", + "start": 54.48, + "end": 54.56, + "confidence": 0.33 + }, + { + "text": "little", + "start": 54.56, + "end": 55.16, + "confidence": 0.003 + }, + { + "text": "bit", + "start": 55.16, + "end": 55.48, + "confidence": 0.367 + }, + { + "text": "of", + "start": 55.48, + "end": 55.5, + "confidence": 0.176 + }, + { + "text": "a", + "start": 55.5, + "end": 55.52, + "confidence": 0.67 + }, + { + "text": "delay", + "start": 55.52, + "end": 55.54, + "confidence": 0.006 + }, + { + "text": "on", + "start": 55.54, + "end": 55.56, + "confidence": 0.019 + }, + { + "text": "it.", + "start": 55.56, + "end": 55.58, + "confidence": 0.098 + } + ] + }, + { + "id": 29, + "seek": 5400, + "start": 55.58, + "end": 56.28, + "text": " Yeah, you know what.", + "tokens": [ + 50414, + 865, + 11, + 291, + 458, + 437, + 13, + 50464 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.051, + "words": [ + { + "text": "Yeah,", + "start": 55.58, + "end": 55.6, + "confidence": 0.023 + }, + { + "text": "you", + "start": 56.0, + "end": 56.02, + "confidence": 0.034 + }, + { + "text": "know", + "start": 56.02, + "end": 56.2, + "confidence": 0.074 + }, + { + "text": "what.", + "start": 56.2, + "end": 56.28, + "confidence": 0.116 + } + ] + }, + { + "id": 30, + "seek": 5400, + "start": 56.46, + "end": 57.52, + "text": " We're gonna hack me on it.", + "tokens": [ + 50464, + 492, + 434, + 799, + 10339, + 385, + 322, + 309, + 13, + 50514 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.017, + "words": [ + { + "text": "We're", + "start": 56.46, + "end": 56.9, + "confidence": 0.106 + }, + { + "text": "gonna", + "start": 56.9, + "end": 57.16, + "confidence": 0.031 + }, + { + "text": "hack", + "start": 57.16, + "end": 57.4, + "confidence": 0.003 + }, + { + "text": "me", + "start": 57.4, + "end": 57.48, + "confidence": 0.0 + }, + { + "text": "on", + "start": 57.48, + "end": 57.5, + "confidence": 0.002 + }, + { + "text": "it.", + "start": 57.5, + "end": 57.52, + "confidence": 0.075 + } + ] + }, + { + "id": 31, + "seek": 5400, + "start": 57.52, + "end": 58.5, + "text": " We're gonna hack you on it.", + "tokens": [ + 50514, + 492, + 434, + 799, + 10339, + 291, + 322, + 309, + 13, + 50564 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.023, + "words": [ + { + "text": "We're", + "start": 57.52, + "end": 58.16, + "confidence": 0.069 + }, + { + "text": "gonna", + "start": 58.16, + "end": 58.3, + "confidence": 0.027 + }, + { + "text": "hack", + "start": 58.3, + "end": 58.34, + "confidence": 0.0 + }, + { + "text": "you", + "start": 58.34, + "end": 58.46, + "confidence": 0.139 + }, + { + "text": "on", + "start": 58.46, + "end": 58.48, + "confidence": 0.021 + }, + { + "text": "it.", + "start": 58.48, + "end": 58.5, + "confidence": 0.112 + } + ] + }, + { + "id": 32, + "seek": 5400, + "start": 58.5, + "end": 59.4, + "text": " We're gonna hack you on it.", + "tokens": [ + 50564, + 492, + 434, + 799, + 10339, + 291, + 322, + 309, + 13, + 50614 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.039, + "words": [ + { + "text": "We're", + "start": 58.5, + "end": 58.62, + "confidence": 0.052 + }, + { + "text": "gonna", + "start": 58.62, + "end": 58.74, + "confidence": 0.037 + }, + { + "text": "hack", + "start": 58.74, + "end": 58.76, + "confidence": 0.0 + }, + { + "text": "you", + "start": 58.76, + "end": 58.94, + "confidence": 0.602 + }, + { + "text": "on", + "start": 58.94, + "end": 59.16, + "confidence": 0.227 + }, + { + "text": "it.", + "start": 59.16, + "end": 59.4, + "confidence": 0.105 + } + ] + }, + { + "id": 33, + "seek": 5400, + "start": 59.4, + "end": 60.04, + "text": " We're gonna hack you on it.", + "tokens": [ + 50614, + 492, + 434, + 799, + 10339, + 291, + 322, + 309, + 13, + 50664 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.023, + "words": [ + { + "text": "We're", + "start": 59.4, + "end": 59.58, + "confidence": 0.047 + }, + { + "text": "gonna", + "start": 59.58, + "end": 59.6, + "confidence": 0.113 + }, + { + "text": "hack", + "start": 59.6, + "end": 59.7, + "confidence": 0.0 + }, + { + "text": "you", + "start": 59.7, + "end": 59.9, + "confidence": 0.091 + }, + { + "text": "on", + "start": 59.9, + "end": 59.94, + "confidence": 0.008 + }, + { + "text": "it.", + "start": 59.94, + "end": 60.04, + "confidence": 0.099 + } + ] + }, + { + "id": 34, + "seek": 5400, + "start": 60.04, + "end": 61.82, + "text": " With a cover, I tried it already.", + "tokens": [ + 50664, + 2022, + 257, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13, + 50764 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.189, + "words": [ + { + "text": "With", + "start": 60.04, + "end": 60.16, + "confidence": 0.071 + }, + { + "text": "a", + "start": 60.16, + "end": 60.8, + "confidence": 0.572 + }, + { + "text": "cover,", + "start": 60.8, + "end": 61.02, + "confidence": 0.064 + }, + { + "text": "I", + "start": 61.16, + "end": 61.28, + "confidence": 0.884 + }, + { + "text": "tried", + "start": 61.28, + "end": 61.44, + "confidence": 0.009 + }, + { + "text": "it", + "start": 61.44, + "end": 61.64, + "confidence": 0.364 + }, + { + "text": "already.", + "start": 61.64, + "end": 61.82, + "confidence": 0.8 + } + ] + }, + { + "id": 35, + "seek": 5400, + "start": 62.52, + "end": 63.06, + "text": " Okay, fine.", + "tokens": [ + 50764, + 1033, + 11, + 2489, + 13, + 50814 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.473, + "words": [ + { + "text": "Okay,", + "start": 62.52, + "end": 62.76, + "confidence": 0.383 + }, + { + "text": "fine.", + "start": 62.88, + "end": 63.06, + "confidence": 0.584 + } + ] + }, + { + "id": 36, + "seek": 5400, + "start": 63.06, + "end": 65.12, + "text": " We weren't sure of that, uh, just the suggestion.", + "tokens": [ + 50814, + 492, + 4999, + 380, + 988, + 295, + 300, + 11, + 2232, + 11, + 445, + 264, + 16541, + 13, + 50914 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.478, + "words": [ + { + "text": "We", + "start": 63.06, + "end": 63.26, + "confidence": 0.274 + }, + { + "text": "weren't", + "start": 63.26, + "end": 63.58, + "confidence": 0.918 + }, + { + "text": "sure", + "start": 63.58, + "end": 63.72, + "confidence": 0.976 + }, + { + "text": "of", + "start": 63.72, + "end": 63.86, + "confidence": 0.438 + }, + { + "text": "that,", + "start": 63.86, + "end": 64.0, + "confidence": 0.58 + }, + { + "text": "uh,", + "start": 64.14, + "end": 64.3, + "confidence": 0.318 + }, + { + "text": "just", + "start": 64.34, + "end": 64.58, + "confidence": 0.401 + }, + { + "text": "the", + "start": 64.58, + "end": 64.78, + "confidence": 0.6 + }, + { + "text": "suggestion.", + "start": 64.78, + "end": 65.12, + "confidence": 0.291 + } + ] + }, + { + "id": 37, + "seek": 5400, + "start": 65.12, + "end": 67.7, + "text": " We thought we'd, uh, say you could check it out.", + "tokens": [ + 50914, + 492, + 1194, + 321, + 1116, + 11, + 2232, + 11, + 584, + 291, + 727, + 1520, + 309, + 484, + 13, + 51064 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.532, + "words": [ + { + "text": "We", + "start": 65.12, + "end": 65.32, + "confidence": 0.134 + }, + { + "text": "thought", + "start": 65.32, + "end": 65.56, + "confidence": 0.834 + }, + { + "text": "we'd,", + "start": 65.56, + "end": 65.9, + "confidence": 0.38 + }, + { + "text": "uh,", + "start": 65.96, + "end": 66.02, + "confidence": 0.663 + }, + { + "text": "say", + "start": 66.76, + "end": 66.94, + "confidence": 0.176 + }, + { + "text": "you", + "start": 66.94, + "end": 67.08, + "confidence": 0.828 + }, + { + "text": "could", + "start": 67.08, + "end": 67.22, + "confidence": 0.7 + }, + { + "text": "check", + "start": 67.22, + "end": 67.38, + "confidence": 0.65 + }, + { + "text": "it", + "start": 67.38, + "end": 67.56, + "confidence": 0.979 + }, + { + "text": "out.", + "start": 67.56, + "end": 67.7, + "confidence": 0.91 + } + ] + }, + { + "id": 38, + "seek": 5400, + "start": 68.14, + "end": 71.9, + "text": " It's not much of a hard to turn on, so, uh, I guess, uh, we're gonna come up with it.", + "tokens": [ + 51064, + 467, + 311, + 406, + 709, + 295, + 257, + 1152, + 281, + 1261, + 322, + 11, + 370, + 11, + 2232, + 11, + 286, + 2041, + 11, + 2232, + 11, + 321, + 434, + 799, + 808, + 493, + 365, + 309, + 13, + 51264 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.306, + "words": [ + { + "text": "It's", + "start": 68.14, + "end": 68.3, + "confidence": 0.098 + }, + { + "text": "not", + "start": 68.3, + "end": 68.4, + "confidence": 0.375 + }, + { + "text": "much", + "start": 68.4, + "end": 68.58, + "confidence": 0.863 + }, + { + "text": "of", + "start": 68.58, + "end": 68.72, + "confidence": 0.227 + }, + { + "text": "a", + "start": 68.72, + "end": 68.86, + "confidence": 0.339 + }, + { + "text": "hard", + "start": 68.86, + "end": 68.88, + "confidence": 0.014 + }, + { + "text": "to", + "start": 68.88, + "end": 68.98, + "confidence": 0.073 + }, + { + "text": "turn", + "start": 68.98, + "end": 69.1, + "confidence": 0.767 + }, + { + "text": "on,", + "start": 69.1, + "end": 69.24, + "confidence": 0.101 + }, + { + "text": "so,", + "start": 69.44, + "end": 69.58, + "confidence": 0.389 + }, + { + "text": "uh,", + "start": 69.7, + "end": 69.84, + "confidence": 0.5 + }, + { + "text": "I", + "start": 70.3, + "end": 70.5, + "confidence": 0.72 + }, + { + "text": "guess,", + "start": 70.5, + "end": 70.66, + "confidence": 0.731 + }, + { + "text": "uh,", + "start": 70.74, + "end": 70.9, + "confidence": 0.969 + }, + { + "text": "we're", + "start": 70.92, + "end": 71.1, + "confidence": 0.614 + }, + { + "text": "gonna", + "start": 71.1, + "end": 71.24, + "confidence": 0.481 + }, + { + "text": "come", + "start": 71.24, + "end": 71.42, + "confidence": 0.84 + }, + { + "text": "up", + "start": 71.42, + "end": 71.6, + "confidence": 0.84 + }, + { + "text": "with", + "start": 71.6, + "end": 71.76, + "confidence": 0.407 + }, + { + "text": "it.", + "start": 71.76, + "end": 71.9, + "confidence": 0.054 + } + ] + }, + { + "id": 39, + "seek": 5400, + "start": 71.9, + "end": 72.46, + "text": " Let us know.", + "tokens": [ + 51264, + 961, + 505, + 458, + 13, + 51314 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.313, + "words": [ + { + "text": "Let", + "start": 71.9, + "end": 72.08, + "confidence": 0.035 + }, + { + "text": "us", + "start": 72.08, + "end": 72.24, + "confidence": 0.466 + }, + { + "text": "know.", + "start": 72.24, + "end": 72.46, + "confidence": 0.772 + } + ] + }, + { + "id": 40, + "seek": 5400, + "start": 72.5, + "end": 72.7, + "text": " Okay, no problem.", + "tokens": [ + 51314, + 1033, + 11, + 572, + 1154, + 13, + 51364 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.04, + "words": [ + { + "text": "Okay,", + "start": 72.5, + "end": 72.52, + "confidence": 0.02 + }, + { + "text": "no", + "start": 72.52, + "end": 72.56, + "confidence": 0.003 + }, + { + "text": "problem.", + "start": 72.56, + "end": 72.7, + "confidence": 0.299 + } + ] + }, + { + "id": 41, + "seek": 5400, + "start": 73.68, + "end": 75.2, + "text": " Okay, no problem.", + "tokens": [ + 51364, + 1033, + 11, + 572, + 1154, + 13, + 51414 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.175, + "words": [ + { + "text": "Okay,", + "start": 73.68, + "end": 74.48, + "confidence": 0.044 + }, + { + "text": "no", + "start": 74.64, + "end": 74.86, + "confidence": 0.126 + }, + { + "text": "problem.", + "start": 74.86, + "end": 75.2, + "confidence": 0.813 + } + ] + }, + { + "id": 42, + "seek": 5400, + "start": 76.74, + "end": 77.48, + "text": " We're gonna let you know when the end of...", + "tokens": [ + 51414, + 492, + 434, + 799, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 485, + 51514 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.056, + "words": [ + { + "text": "We're", + "start": 76.74, + "end": 76.82, + "confidence": 0.04 + }, + { + "text": "gonna", + "start": 76.82, + "end": 76.84, + "confidence": 0.047 + }, + { + "text": "let", + "start": 76.84, + "end": 76.86, + "confidence": 0.01 + }, + { + "text": "you", + "start": 76.86, + "end": 77.14, + "confidence": 0.16 + }, + { + "text": "know", + "start": 77.14, + "end": 77.18, + "confidence": 0.642 + }, + { + "text": "when", + "start": 77.18, + "end": 77.34, + "confidence": 0.194 + }, + { + "text": "the", + "start": 77.34, + "end": 77.4, + "confidence": 0.053 + }, + { + "text": "end", + "start": 77.4, + "end": 77.46, + "confidence": 0.005 + }, + { + "text": "of...", + "start": 77.46, + "end": 77.48, + "confidence": 0.062 + } + ] + }, + { + "id": 43, + "seek": 5400, + "start": 78.04, + "end": 78.22, + "text": " None.", + "tokens": [ + 51514, + 14492, + 13, + 51564 + ], + "temperature": 0.6000000000000001, + "avg_logprob": -0.5010345458984375, + "compression_ratio": 1.9803149606299213, + "no_speech_prob": 0.2010815143585205, + "confidence": 0.02, + "words": [ + { + "text": "None.", + "start": 78.04, + "end": 78.22, + "confidence": 0.02 + } + ] + }, + { + "id": 44, + "seek": 7800, + "start": 78.22, + "end": 78.76, + "text": " I'm glad you came.", + "tokens": [ + 50364, + 286, + 478, + 5404, + 291, + 1361, + 13, + 50589 + ], + "temperature": 1.0, + "avg_logprob": -1.8937584661668347, + "compression_ratio": 1.1833333333333333, + "no_speech_prob": 0.2503969073295593, + "confidence": 0.072, + "words": [ + { + "text": "I'm", + "start": 78.22, + "end": 78.26, + "confidence": 0.042 + }, + { + "text": "glad", + "start": 78.26, + "end": 78.28, + "confidence": 0.019 + }, + { + "text": "you", + "start": 78.28, + "end": 78.68, + "confidence": 0.196 + }, + { + "text": "came.", + "start": 78.68, + "end": 78.76, + "confidence": 0.147 + } + ] + }, + { + "id": 45, + "seek": 7800, + "start": 84.96, + "end": 85.88, + "text": " Everything's OK.", + "tokens": [ + 50724, + 5471, + 311, + 2264, + 13, + 50774 + ], + "temperature": 1.0, + "avg_logprob": -1.8937584661668347, + "compression_ratio": 1.1833333333333333, + "no_speech_prob": 0.2503969073295593, + "confidence": 0.028, + "words": [ + { + "text": "Everything's", + "start": 84.96, + "end": 85.86, + "confidence": 0.007 + }, + { + "text": "OK.", + "start": 85.86, + "end": 85.88, + "confidence": 0.118 + } + ] + }, + { + "id": 46, + "seek": 7800, + "start": 86.5, + "end": 87.32, + "text": " Everything's OK.", + "tokens": [ + 50799, + 5471, + 311, + 2264, + 13, + 50849 + ], + "temperature": 1.0, + "avg_logprob": -1.8937584661668347, + "compression_ratio": 1.1833333333333333, + "no_speech_prob": 0.2503969073295593, + "confidence": 0.048, + "words": [ + { + "text": "Everything's", + "start": 86.5, + "end": 87.14, + "confidence": 0.015 + }, + { + "text": "OK.", + "start": 87.14, + "end": 87.32, + "confidence": 0.154 + } + ] + }, + { + "id": 47, + "seek": 7800, + "start": 87.32, + "end": 87.58, + "text": " Yeah.", + "tokens": [ + 50849, + 865, + 13, + 50899 + ], + "temperature": 1.0, + "avg_logprob": -1.8937584661668347, + "compression_ratio": 1.1833333333333333, + "no_speech_prob": 0.2503969073295593, + "confidence": 0.064, + "words": [ + { + "text": "Yeah.", + "start": 87.32, + "end": 87.58, + "confidence": 0.064 + } + ] + }, + { + "id": 48, + "seek": 7800, + "start": 88.2, + "end": 88.24, + "text": " You're safe.", + "tokens": [ + 50899, + 509, + 434, + 3273, + 13, + 50949 + ], + "temperature": 1.0, + "avg_logprob": -1.8937584661668347, + "compression_ratio": 1.1833333333333333, + "no_speech_prob": 0.2503969073295593, + "confidence": 0.017, + "words": [ + { + "text": "You're", + "start": 88.2, + "end": 88.22, + "confidence": 0.009 + }, + { + "text": "safe.", + "start": 88.22, + "end": 88.24, + "confidence": 0.03 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive/naive_apollo11.mp3.words.json b/tests/expected/naive/naive_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..a08385826f3296742d8adc2bb81ef4301d4f5f81 --- /dev/null +++ b/tests/expected/naive/naive_apollo11.mp3.words.json @@ -0,0 +1,3820 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.54, + "end": 6.56, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.7220700916491056, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.495, + "words": [ + { + "text": "Apollo", + "start": 0.54, + "end": 0.92, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.92, + "end": 1.26, + "confidence": 0.842 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.974 + }, + { + "text": "we", + "start": 1.72, + "end": 1.92, + "confidence": 0.453 + }, + { + "text": "got", + "start": 1.92, + "end": 2.1, + "confidence": 0.791 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.992 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.97 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.944 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.99 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.935 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.974 + }, + { + "text": "Soyuz-VA", + "start": 3.92, + "end": 5.16, + "confidence": 0.327 + }, + { + "text": "GLEME", + "start": 5.16, + "end": 5.72, + "confidence": 0.179 + }, + { + "text": "GVA.", + "start": 5.72, + "end": 6.56, + "confidence": 0.321 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 19.08, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.7220700916491056, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.311, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 10.82, + "confidence": 0.054 + }, + { + "text": "okay,", + "start": 11.46, + "end": 12.2, + "confidence": 0.359 + }, + { + "text": "we", + "start": 12.44, + "end": 12.96, + "confidence": 0.347 + }, + { + "text": "like", + "start": 12.96, + "end": 13.28, + "confidence": 0.512 + }, + { + "text": "to", + "start": 13.28, + "end": 13.6, + "confidence": 0.156 + }, + { + "text": "say", + "start": 13.6, + "end": 14.9, + "confidence": 0.086 + }, + { + "text": "that", + "start": 14.9, + "end": 15.32, + "confidence": 0.274 + }, + { + "text": "they", + "start": 15.32, + "end": 15.46, + "confidence": 0.358 + }, + { + "text": "make", + "start": 15.46, + "end": 15.7, + "confidence": 0.127 + }, + { + "text": "the", + "start": 15.7, + "end": 15.86, + "confidence": 0.165 + }, + { + "text": "one", + "start": 15.86, + "end": 16.08, + "confidence": 0.427 + }, + { + "text": "that's", + "start": 16.08, + "end": 16.26, + "confidence": 0.357 + }, + { + "text": "on", + "start": 16.26, + "end": 16.46, + "confidence": 0.314 + }, + { + "text": "the", + "start": 16.46, + "end": 16.76, + "confidence": 0.499 + }, + { + "text": "helmet", + "start": 16.76, + "end": 17.26, + "confidence": 0.307 + }, + { + "text": "we're", + "start": 17.26, + "end": 17.78, + "confidence": 0.256 + }, + { + "text": "going", + "start": 17.78, + "end": 17.94, + "confidence": 0.478 + }, + { + "text": "to", + "start": 17.94, + "end": 18.1, + "confidence": 0.522 + }, + { + "text": "have", + "start": 18.1, + "end": 18.18, + "confidence": 0.804 + }, + { + "text": "in", + "start": 18.18, + "end": 18.36, + "confidence": 0.772 + }, + { + "text": "B1.", + "start": 18.36, + "end": 19.08, + "confidence": 0.738 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 19.08, + "end": 24.54, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.7220700916491056, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993358850479126, + "confidence": 0.129, + "words": [ + { + "text": "And", + "start": 19.08, + "end": 20.14, + "confidence": 0.232 + }, + { + "text": "you", + "start": 20.14, + "end": 20.32, + "confidence": 0.905 + }, + { + "text": "can", + "start": 20.32, + "end": 20.5, + "confidence": 0.422 + }, + { + "text": "put", + "start": 20.5, + "end": 20.64, + "confidence": 0.942 + }, + { + "text": "the", + "start": 20.64, + "end": 20.82, + "confidence": 0.983 + }, + { + "text": "other", + "start": 20.82, + "end": 21.0, + "confidence": 0.994 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.965 + }, + { + "text": "on", + "start": 21.18, + "end": 21.36, + "confidence": 0.965 + }, + { + "text": "the", + "start": 21.36, + "end": 21.78, + "confidence": 0.187 + }, + { + "text": "mic", + "start": 21.78, + "end": 22.48, + "confidence": 0.587 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.82, + "confidence": 0.029 + }, + { + "text": "with", + "start": 22.82, + "end": 23.06, + "confidence": 0.047 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.036 + }, + { + "text": "GVA", + "start": 23.3, + "end": 23.74, + "confidence": 0.001 + }, + { + "text": "blizzard", + "start": 23.74, + "end": 24.18, + "confidence": 0.022 + }, + { + "text": "frames.", + "start": 24.18, + "end": 24.54, + "confidence": 0.225 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 31.34, + "end": 54.52, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.11161944071451822, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.42649850249290466, + "confidence": 0.885, + "words": [ + { + "text": "Alright,", + "start": 31.34, + "end": 31.52, + "confidence": 0.069 + }, + { + "text": "got", + "start": 31.82, + "end": 31.86, + "confidence": 0.144 + }, + { + "text": "them,", + "start": 31.86, + "end": 32.1, + "confidence": 0.194 + }, + { + "text": "got", + "start": 32.38, + "end": 32.88, + "confidence": 0.473 + }, + { + "text": "them,", + "start": 32.88, + "end": 33.48, + "confidence": 0.611 + }, + { + "text": "got", + "start": 33.5, + "end": 33.52, + "confidence": 0.574 + }, + { + "text": "them,", + "start": 33.52, + "end": 33.54, + "confidence": 0.51 + }, + { + "text": "got", + "start": 33.76, + "end": 33.78, + "confidence": 0.6 + }, + { + "text": "them,", + "start": 33.78, + "end": 33.88, + "confidence": 0.488 + }, + { + "text": "got", + "start": 34.12, + "end": 34.14, + "confidence": 0.724 + }, + { + "text": "them,", + "start": 34.14, + "end": 34.16, + "confidence": 0.567 + }, + { + "text": "got", + "start": 34.16, + "end": 34.18, + "confidence": 0.761 + }, + { + "text": "them,", + "start": 34.18, + "end": 34.34, + "confidence": 0.689 + }, + { + "text": "got", + "start": 34.34, + "end": 34.36, + "confidence": 0.851 + }, + { + "text": "them,", + "start": 34.36, + "end": 34.5, + "confidence": 0.813 + }, + { + "text": "got", + "start": 34.5, + "end": 34.64, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 34.64, + "end": 34.66, + "confidence": 0.756 + }, + { + "text": "got", + "start": 34.82, + "end": 34.92, + "confidence": 0.467 + }, + { + "text": "them,", + "start": 34.92, + "end": 35.0, + "confidence": 0.851 + }, + { + "text": "got", + "start": 35.0, + "end": 35.02, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 35.02, + "end": 35.1, + "confidence": 0.897 + }, + { + "text": "got", + "start": 35.1, + "end": 35.28, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 35.28, + "end": 35.64, + "confidence": 0.911 + }, + { + "text": "got", + "start": 35.66, + "end": 35.68, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 35.68, + "end": 35.7, + "confidence": 0.917 + }, + { + "text": "got", + "start": 35.74, + "end": 35.76, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 35.76, + "end": 35.78, + "confidence": 0.915 + }, + { + "text": "got", + "start": 36.04, + "end": 36.36, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.36, + "end": 36.68, + "confidence": 0.91 + }, + { + "text": "got", + "start": 36.68, + "end": 36.7, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 36.7, + "end": 36.72, + "confidence": 0.916 + }, + { + "text": "got", + "start": 36.72, + "end": 36.74, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 36.74, + "end": 36.76, + "confidence": 0.92 + }, + { + "text": "got", + "start": 36.76, + "end": 36.78, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 36.78, + "end": 36.8, + "confidence": 0.924 + }, + { + "text": "got", + "start": 36.8, + "end": 36.82, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.82, + "end": 36.84, + "confidence": 0.93 + }, + { + "text": "got", + "start": 36.84, + "end": 36.86, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 36.86, + "end": 36.88, + "confidence": 0.937 + }, + { + "text": "got", + "start": 36.9, + "end": 37.46, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.82, + "confidence": 0.942 + }, + { + "text": "got", + "start": 37.82, + "end": 37.84, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 37.84, + "end": 38.12, + "confidence": 0.945 + }, + { + "text": "got", + "start": 38.12, + "end": 38.14, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 38.14, + "end": 38.16, + "confidence": 0.948 + }, + { + "text": "got", + "start": 38.16, + "end": 38.18, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 38.18, + "end": 38.2, + "confidence": 0.951 + }, + { + "text": "got", + "start": 38.2, + "end": 38.22, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 38.22, + "end": 38.54, + "confidence": 0.953 + }, + { + "text": "got", + "start": 38.54, + "end": 38.56, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 38.56, + "end": 38.58, + "confidence": 0.956 + }, + { + "text": "got", + "start": 38.58, + "end": 38.6, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 38.6, + "end": 38.62, + "confidence": 0.956 + }, + { + "text": "got", + "start": 38.62, + "end": 38.64, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 38.64, + "end": 38.66, + "confidence": 0.958 + }, + { + "text": "got", + "start": 38.66, + "end": 38.68, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.68, + "end": 38.7, + "confidence": 0.961 + }, + { + "text": "got", + "start": 38.7, + "end": 38.72, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.72, + "end": 38.74, + "confidence": 0.961 + }, + { + "text": "got", + "start": 38.74, + "end": 38.76, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.76, + "end": 38.78, + "confidence": 0.962 + }, + { + "text": "got", + "start": 38.78, + "end": 38.8, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.8, + "end": 38.82, + "confidence": 0.964 + }, + { + "text": "got", + "start": 38.82, + "end": 38.84, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 38.84, + "end": 38.86, + "confidence": 0.964 + }, + { + "text": "got", + "start": 38.86, + "end": 38.88, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 38.88, + "end": 38.9, + "confidence": 0.965 + }, + { + "text": "got", + "start": 38.9, + "end": 38.92, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.92, + "end": 38.94, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.94, + "end": 38.96, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.96, + "end": 38.98, + "confidence": 0.969 + }, + { + "text": "got", + "start": 38.98, + "end": 39.0, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 39.0, + "end": 39.02, + "confidence": 0.968 + }, + { + "text": "got", + "start": 39.02, + "end": 39.04, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 39.04, + "end": 39.06, + "confidence": 0.971 + }, + { + "text": "got", + "start": 39.06, + "end": 39.08, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 39.08, + "end": 39.1, + "confidence": 0.971 + }, + { + "text": "got", + "start": 39.1, + "end": 39.12, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 39.12, + "end": 39.14, + "confidence": 0.974 + }, + { + "text": "got", + "start": 39.14, + "end": 39.16, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 39.16, + "end": 39.18, + "confidence": 0.974 + }, + { + "text": "got", + "start": 39.18, + "end": 39.2, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 39.2, + "end": 39.22, + "confidence": 0.975 + }, + { + "text": "got", + "start": 39.22, + "end": 39.24, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 39.24, + "end": 39.26, + "confidence": 0.976 + }, + { + "text": "got", + "start": 39.26, + "end": 39.28, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 39.28, + "end": 39.3, + "confidence": 0.978 + }, + { + "text": "got", + "start": 39.3, + "end": 39.32, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 39.32, + "end": 39.34, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.34, + "end": 39.36, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 39.36, + "end": 39.38, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.38, + "end": 39.4, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 39.4, + "end": 39.42, + "confidence": 0.98 + }, + { + "text": "got", + "start": 39.42, + "end": 39.44, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 39.44, + "end": 39.46, + "confidence": 0.981 + }, + { + "text": "got", + "start": 39.46, + "end": 39.48, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.48, + "end": 39.5, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.5, + "end": 39.52, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.52, + "end": 39.54, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.54, + "end": 39.56, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.56, + "end": 39.58, + "confidence": 0.984 + }, + { + "text": "got", + "start": 39.58, + "end": 39.6, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.6, + "end": 39.62, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.62, + "end": 39.64, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.64, + "end": 39.66, + "confidence": 0.984 + }, + { + "text": "got", + "start": 39.66, + "end": 39.68, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.68, + "end": 39.7, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.7, + "end": 39.72, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.72, + "end": 39.74, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.74, + "end": 39.76, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.76, + "end": 39.78, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.78, + "end": 39.8, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.8, + "end": 39.82, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.82, + "end": 39.84, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.84, + "end": 39.86, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.86, + "end": 39.88, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 39.88, + "end": 39.9, + "confidence": 0.987 + }, + { + "text": "got", + "start": 39.9, + "end": 39.92, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 39.92, + "end": 39.94, + "confidence": 0.987 + }, + { + "text": "got", + "start": 39.94, + "end": 39.96, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 39.96, + "end": 39.98, + "confidence": 0.988 + }, + { + "text": "got", + "start": 39.98, + "end": 40.0, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.0, + "end": 40.02, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.02, + "end": 40.42, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.42, + "end": 40.68, + "confidence": 0.988 + }, + { + "text": "got", + "start": 41.04, + "end": 41.06, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 41.06, + "end": 41.54, + "confidence": 0.988 + }, + { + "text": "got", + "start": 41.88, + "end": 41.9, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 41.9, + "end": 42.48, + "confidence": 0.987 + }, + { + "text": "got", + "start": 42.68, + "end": 42.7, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 42.7, + "end": 43.0, + "confidence": 0.989 + }, + { + "text": "got", + "start": 43.54, + "end": 44.06, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.06, + "end": 45.16, + "confidence": 0.989 + }, + { + "text": "got", + "start": 45.16, + "end": 45.46, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 45.46, + "end": 45.8, + "confidence": 0.99 + }, + { + "text": "got", + "start": 45.82, + "end": 46.22, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.22, + "end": 46.38, + "confidence": 0.989 + }, + { + "text": "got", + "start": 46.6, + "end": 47.0, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 47.0, + "end": 47.76, + "confidence": 0.99 + }, + { + "text": "got", + "start": 48.18, + "end": 48.46, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 48.46, + "end": 48.9, + "confidence": 0.99 + }, + { + "text": "got", + "start": 48.9, + "end": 49.2, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 49.2, + "end": 49.84, + "confidence": 0.99 + }, + { + "text": "got", + "start": 50.36, + "end": 50.38, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 50.38, + "end": 50.76, + "confidence": 0.99 + }, + { + "text": "got", + "start": 51.5, + "end": 51.74, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 51.74, + "end": 52.34, + "confidence": 0.99 + }, + { + "text": "got", + "start": 52.44, + "end": 53.28, + "confidence": 0.994 + }, + { + "text": "them", + "start": 53.28, + "end": 54.52, + "confidence": 0.996 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 54.52, + "end": 84.46, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.05313938722482177, + "compression_ratio": 29.52, + "no_speech_prob": 0.24551986157894135, + "confidence": 0.844, + "words": [ + { + "text": "got", + "start": 54.52, + "end": 54.82, + "confidence": 0.002 + }, + { + "text": "them,", + "start": 54.82, + "end": 54.84, + "confidence": 0.016 + }, + { + "text": "got", + "start": 54.84, + "end": 55.14, + "confidence": 0.038 + }, + { + "text": "them,", + "start": 55.14, + "end": 55.56, + "confidence": 0.421 + }, + { + "text": "got", + "start": 55.56, + "end": 56.58, + "confidence": 0.645 + }, + { + "text": "them,", + "start": 56.58, + "end": 56.6, + "confidence": 0.595 + }, + { + "text": "got", + "start": 56.6, + "end": 57.36, + "confidence": 0.78 + }, + { + "text": "them,", + "start": 57.36, + "end": 57.52, + "confidence": 0.639 + }, + { + "text": "got", + "start": 57.76, + "end": 58.22, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 58.22, + "end": 58.26, + "confidence": 0.668 + }, + { + "text": "got", + "start": 58.74, + "end": 58.78, + "confidence": 0.876 + }, + { + "text": "them,", + "start": 58.78, + "end": 59.46, + "confidence": 0.672 + }, + { + "text": "got", + "start": 59.88, + "end": 59.9, + "confidence": 0.836 + }, + { + "text": "them,", + "start": 59.9, + "end": 60.32, + "confidence": 0.74 + }, + { + "text": "got", + "start": 60.52, + "end": 60.6, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 60.6, + "end": 60.7, + "confidence": 0.814 + }, + { + "text": "got", + "start": 60.8, + "end": 60.98, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 60.98, + "end": 61.78, + "confidence": 0.566 + }, + { + "text": "got", + "start": 61.78, + "end": 61.8, + "confidence": 0.122 + }, + { + "text": "them,", + "start": 61.8, + "end": 61.82, + "confidence": 0.748 + }, + { + "text": "got", + "start": 61.82, + "end": 61.84, + "confidence": 0.75 + }, + { + "text": "them,", + "start": 61.84, + "end": 61.86, + "confidence": 0.818 + }, + { + "text": "got", + "start": 61.86, + "end": 61.88, + "confidence": 0.797 + }, + { + "text": "them,", + "start": 61.88, + "end": 61.9, + "confidence": 0.841 + }, + { + "text": "got", + "start": 61.9, + "end": 61.92, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 61.92, + "end": 61.94, + "confidence": 0.864 + }, + { + "text": "got", + "start": 61.94, + "end": 61.96, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 61.96, + "end": 61.98, + "confidence": 0.893 + }, + { + "text": "got", + "start": 61.98, + "end": 62.0, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 62.0, + "end": 62.02, + "confidence": 0.91 + }, + { + "text": "got", + "start": 62.02, + "end": 62.04, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 62.04, + "end": 62.06, + "confidence": 0.912 + }, + { + "text": "got", + "start": 62.06, + "end": 62.08, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 62.08, + "end": 62.1, + "confidence": 0.923 + }, + { + "text": "got", + "start": 62.1, + "end": 62.12, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 62.12, + "end": 62.14, + "confidence": 0.932 + }, + { + "text": "got", + "start": 62.14, + "end": 62.16, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 62.16, + "end": 62.18, + "confidence": 0.94 + }, + { + "text": "got", + "start": 62.5, + "end": 62.72, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 62.72, + "end": 62.74, + "confidence": 0.945 + }, + { + "text": "got", + "start": 62.74, + "end": 62.76, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 62.76, + "end": 62.78, + "confidence": 0.95 + }, + { + "text": "got", + "start": 62.78, + "end": 62.8, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 62.8, + "end": 62.82, + "confidence": 0.954 + }, + { + "text": "got", + "start": 62.82, + "end": 62.84, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 62.84, + "end": 62.86, + "confidence": 0.957 + }, + { + "text": "got", + "start": 62.86, + "end": 62.88, + "confidence": 0.929 + }, + { + "text": "them,", + "start": 62.88, + "end": 62.9, + "confidence": 0.959 + }, + { + "text": "got", + "start": 62.9, + "end": 62.92, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 62.92, + "end": 62.94, + "confidence": 0.962 + }, + { + "text": "got", + "start": 62.94, + "end": 62.96, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 62.96, + "end": 62.98, + "confidence": 0.965 + }, + { + "text": "got", + "start": 62.98, + "end": 63.0, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 63.0, + "end": 63.02, + "confidence": 0.965 + }, + { + "text": "got", + "start": 63.02, + "end": 63.04, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 63.04, + "end": 63.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 63.06, + "end": 63.08, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 63.08, + "end": 63.1, + "confidence": 0.967 + }, + { + "text": "got", + "start": 63.1, + "end": 63.12, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 63.12, + "end": 63.14, + "confidence": 0.969 + }, + { + "text": "got", + "start": 63.14, + "end": 63.16, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 63.16, + "end": 63.18, + "confidence": 0.969 + }, + { + "text": "got", + "start": 63.18, + "end": 63.2, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 63.2, + "end": 63.22, + "confidence": 0.97 + }, + { + "text": "got", + "start": 63.22, + "end": 63.24, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 63.24, + "end": 63.26, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.26, + "end": 63.28, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 63.28, + "end": 63.3, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.3, + "end": 63.32, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 63.32, + "end": 63.34, + "confidence": 0.972 + }, + { + "text": "got", + "start": 63.34, + "end": 63.36, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 63.36, + "end": 63.38, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.38, + "end": 63.4, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 63.4, + "end": 63.42, + "confidence": 0.973 + }, + { + "text": "got", + "start": 63.42, + "end": 63.44, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 63.44, + "end": 63.46, + "confidence": 0.973 + }, + { + "text": "got", + "start": 63.46, + "end": 63.48, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 63.48, + "end": 63.5, + "confidence": 0.974 + }, + { + "text": "got", + "start": 63.5, + "end": 63.52, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 63.52, + "end": 63.54, + "confidence": 0.974 + }, + { + "text": "got", + "start": 63.54, + "end": 63.56, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 63.56, + "end": 63.58, + "confidence": 0.975 + }, + { + "text": "got", + "start": 63.58, + "end": 63.6, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 63.6, + "end": 63.62, + "confidence": 0.975 + }, + { + "text": "got", + "start": 63.62, + "end": 63.64, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 63.64, + "end": 63.66, + "confidence": 0.975 + }, + { + "text": "got", + "start": 63.66, + "end": 63.68, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 63.68, + "end": 63.7, + "confidence": 0.976 + }, + { + "text": "got", + "start": 63.7, + "end": 63.72, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 63.72, + "end": 63.74, + "confidence": 0.976 + }, + { + "text": "got", + "start": 63.74, + "end": 63.76, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 63.76, + "end": 63.78, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.78, + "end": 63.8, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 63.8, + "end": 63.82, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.82, + "end": 63.84, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 63.84, + "end": 63.86, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.86, + "end": 63.88, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 63.88, + "end": 63.9, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.9, + "end": 63.92, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 63.92, + "end": 63.94, + "confidence": 0.979 + }, + { + "text": "got", + "start": 63.94, + "end": 63.96, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 63.96, + "end": 63.98, + "confidence": 0.979 + }, + { + "text": "got", + "start": 63.98, + "end": 64.0, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 64.0, + "end": 64.02, + "confidence": 0.979 + }, + { + "text": "got", + "start": 64.02, + "end": 64.04, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 64.04, + "end": 64.06, + "confidence": 0.979 + }, + { + "text": "got", + "start": 64.06, + "end": 64.08, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 64.08, + "end": 64.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 64.1, + "end": 64.12, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 64.12, + "end": 64.4, + "confidence": 0.98 + }, + { + "text": "got", + "start": 64.42, + "end": 64.58, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 64.58, + "end": 64.64, + "confidence": 0.98 + }, + { + "text": "got", + "start": 64.74, + "end": 65.06, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 65.06, + "end": 65.22, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.22, + "end": 65.98, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 65.98, + "end": 66.26, + "confidence": 0.981 + }, + { + "text": "got", + "start": 66.28, + "end": 67.32, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 67.32, + "end": 67.44, + "confidence": 0.982 + }, + { + "text": "got", + "start": 67.68, + "end": 67.7, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 67.7, + "end": 67.88, + "confidence": 0.981 + }, + { + "text": "got", + "start": 67.92, + "end": 68.8, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 68.8, + "end": 69.12, + "confidence": 0.982 + }, + { + "text": "got", + "start": 69.26, + "end": 69.28, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 69.28, + "end": 69.42, + "confidence": 0.982 + }, + { + "text": "got", + "start": 69.82, + "end": 69.84, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 69.84, + "end": 70.16, + "confidence": 0.982 + }, + { + "text": "got", + "start": 70.16, + "end": 70.52, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 70.52, + "end": 71.38, + "confidence": 0.981 + }, + { + "text": "got", + "start": 71.38, + "end": 71.58, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 71.58, + "end": 71.94, + "confidence": 0.982 + }, + { + "text": "got", + "start": 71.94, + "end": 72.12, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 72.12, + "end": 72.76, + "confidence": 0.983 + }, + { + "text": "got", + "start": 72.94, + "end": 73.56, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 73.56, + "end": 74.08, + "confidence": 0.983 + }, + { + "text": "got", + "start": 74.68, + "end": 75.08, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 75.08, + "end": 75.18, + "confidence": 0.984 + }, + { + "text": "got", + "start": 75.2, + "end": 76.42, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 76.42, + "end": 76.6, + "confidence": 0.983 + }, + { + "text": "got", + "start": 76.6, + "end": 76.8, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 76.8, + "end": 77.06, + "confidence": 0.985 + }, + { + "text": "got", + "start": 77.06, + "end": 77.44, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 77.44, + "end": 77.78, + "confidence": 0.984 + }, + { + "text": "got", + "start": 78.22, + "end": 78.24, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 78.24, + "end": 78.44, + "confidence": 0.985 + }, + { + "text": "got", + "start": 79.86, + "end": 79.9, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 79.9, + "end": 82.46, + "confidence": 0.985 + }, + { + "text": "got", + "start": 82.46, + "end": 84.44, + "confidence": 0.992 + }, + { + "text": "them", + "start": 84.44, + "end": 84.46, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.2, + "end": 91.86, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04266870609847954, + "compression_ratio": 29.52, + "no_speech_prob": 0.6326744556427002, + "confidence": 0.854, + "words": [ + { + "text": "got", + "start": 85.2, + "end": 85.36, + "confidence": 0.0 + }, + { + "text": "them,", + "start": 85.36, + "end": 85.42, + "confidence": 0.024 + }, + { + "text": "got", + "start": 85.68, + "end": 86.68, + "confidence": 0.791 + }, + { + "text": "them,", + "start": 86.68, + "end": 86.82, + "confidence": 0.583 + }, + { + "text": "got", + "start": 87.4, + "end": 87.6, + "confidence": 0.828 + }, + { + "text": "them,", + "start": 87.6, + "end": 88.02, + "confidence": 0.486 + }, + { + "text": "got", + "start": 88.02, + "end": 88.88, + "confidence": 0.881 + }, + { + "text": "them,", + "start": 88.88, + "end": 88.9, + "confidence": 0.425 + }, + { + "text": "got", + "start": 88.92, + "end": 88.94, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 88.94, + "end": 88.96, + "confidence": 0.397 + }, + { + "text": "got", + "start": 88.96, + "end": 88.98, + "confidence": 0.897 + }, + { + "text": "them,", + "start": 88.98, + "end": 89.0, + "confidence": 0.4 + }, + { + "text": "got", + "start": 89.0, + "end": 89.02, + "confidence": 0.869 + }, + { + "text": "them,", + "start": 89.02, + "end": 89.06, + "confidence": 0.47 + }, + { + "text": "got", + "start": 89.18, + "end": 89.2, + "confidence": 0.89 + }, + { + "text": "them,", + "start": 89.2, + "end": 89.22, + "confidence": 0.57 + }, + { + "text": "got", + "start": 89.22, + "end": 89.24, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 89.24, + "end": 89.26, + "confidence": 0.632 + }, + { + "text": "got", + "start": 89.26, + "end": 89.28, + "confidence": 0.598 + }, + { + "text": "them,", + "start": 89.28, + "end": 89.3, + "confidence": 0.743 + }, + { + "text": "got", + "start": 89.3, + "end": 89.32, + "confidence": 0.819 + }, + { + "text": "them,", + "start": 89.32, + "end": 89.34, + "confidence": 0.84 + }, + { + "text": "got", + "start": 89.34, + "end": 89.36, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 89.36, + "end": 89.38, + "confidence": 0.888 + }, + { + "text": "got", + "start": 89.38, + "end": 89.4, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 89.4, + "end": 89.42, + "confidence": 0.914 + }, + { + "text": "got", + "start": 89.42, + "end": 89.44, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 89.44, + "end": 89.46, + "confidence": 0.932 + }, + { + "text": "got", + "start": 89.46, + "end": 89.48, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 89.48, + "end": 89.5, + "confidence": 0.944 + }, + { + "text": "got", + "start": 89.5, + "end": 89.52, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 89.52, + "end": 89.54, + "confidence": 0.945 + }, + { + "text": "got", + "start": 89.54, + "end": 89.56, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 89.56, + "end": 89.58, + "confidence": 0.944 + }, + { + "text": "got", + "start": 89.58, + "end": 89.6, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 89.6, + "end": 89.62, + "confidence": 0.946 + }, + { + "text": "got", + "start": 89.62, + "end": 89.64, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 89.64, + "end": 89.66, + "confidence": 0.952 + }, + { + "text": "got", + "start": 89.66, + "end": 89.68, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.68, + "end": 89.7, + "confidence": 0.954 + }, + { + "text": "got", + "start": 89.7, + "end": 89.72, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.72, + "end": 89.74, + "confidence": 0.96 + }, + { + "text": "got", + "start": 89.74, + "end": 89.76, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.76, + "end": 89.78, + "confidence": 0.963 + }, + { + "text": "got", + "start": 89.78, + "end": 89.8, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 89.8, + "end": 89.82, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.82, + "end": 89.84, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 89.84, + "end": 89.86, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.86, + "end": 89.88, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 89.88, + "end": 89.9, + "confidence": 0.971 + }, + { + "text": "got", + "start": 89.9, + "end": 89.92, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 89.92, + "end": 89.94, + "confidence": 0.975 + }, + { + "text": "got", + "start": 89.94, + "end": 89.96, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 89.96, + "end": 89.98, + "confidence": 0.975 + }, + { + "text": "got", + "start": 89.98, + "end": 90.0, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 90.0, + "end": 90.02, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.02, + "end": 90.04, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 90.04, + "end": 90.06, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.06, + "end": 90.08, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 90.08, + "end": 90.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 90.1, + "end": 90.12, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 90.12, + "end": 90.14, + "confidence": 0.979 + }, + { + "text": "got", + "start": 90.14, + "end": 90.16, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 90.16, + "end": 90.18, + "confidence": 0.981 + }, + { + "text": "got", + "start": 90.18, + "end": 90.2, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 90.2, + "end": 90.22, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.22, + "end": 90.24, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 90.24, + "end": 90.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.26, + "end": 90.28, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 90.28, + "end": 90.3, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.3, + "end": 90.32, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 90.32, + "end": 90.34, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.34, + "end": 90.36, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 90.36, + "end": 90.38, + "confidence": 0.984 + }, + { + "text": "got", + "start": 90.38, + "end": 90.4, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 90.4, + "end": 90.42, + "confidence": 0.984 + }, + { + "text": "got", + "start": 90.42, + "end": 90.44, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 90.44, + "end": 90.46, + "confidence": 0.985 + }, + { + "text": "got", + "start": 90.46, + "end": 90.48, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 90.48, + "end": 90.5, + "confidence": 0.985 + }, + { + "text": "got", + "start": 90.5, + "end": 90.52, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 90.52, + "end": 90.54, + "confidence": 0.986 + }, + { + "text": "got", + "start": 90.54, + "end": 90.56, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 90.56, + "end": 90.58, + "confidence": 0.986 + }, + { + "text": "got", + "start": 90.58, + "end": 90.6, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 90.6, + "end": 90.62, + "confidence": 0.987 + }, + { + "text": "got", + "start": 90.62, + "end": 90.64, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 90.64, + "end": 90.66, + "confidence": 0.987 + }, + { + "text": "got", + "start": 90.66, + "end": 90.68, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 90.68, + "end": 90.7, + "confidence": 0.987 + }, + { + "text": "got", + "start": 90.7, + "end": 90.72, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 90.72, + "end": 90.74, + "confidence": 0.988 + }, + { + "text": "got", + "start": 90.74, + "end": 90.76, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 90.76, + "end": 90.78, + "confidence": 0.989 + }, + { + "text": "got", + "start": 90.78, + "end": 90.8, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 90.8, + "end": 90.82, + "confidence": 0.989 + }, + { + "text": "got", + "start": 90.82, + "end": 90.84, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 90.84, + "end": 90.86, + "confidence": 0.989 + }, + { + "text": "got", + "start": 90.86, + "end": 90.88, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 90.88, + "end": 90.9, + "confidence": 0.99 + }, + { + "text": "got", + "start": 90.9, + "end": 90.92, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 90.92, + "end": 90.94, + "confidence": 0.99 + }, + { + "text": "got", + "start": 90.94, + "end": 90.96, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 90.96, + "end": 90.98, + "confidence": 0.991 + }, + { + "text": "got", + "start": 90.98, + "end": 91.0, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 91.0, + "end": 91.02, + "confidence": 0.991 + }, + { + "text": "got", + "start": 91.02, + "end": 91.04, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 91.04, + "end": 91.06, + "confidence": 0.991 + }, + { + "text": "got", + "start": 91.06, + "end": 91.08, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 91.08, + "end": 91.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.1, + "end": 91.12, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 91.12, + "end": 91.14, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.14, + "end": 91.16, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 91.16, + "end": 91.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.18, + "end": 91.2, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 91.2, + "end": 91.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.22, + "end": 91.24, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.24, + "end": 91.26, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.26, + "end": 91.28, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.28, + "end": 91.3, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.3, + "end": 91.32, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.32, + "end": 91.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.34, + "end": 91.36, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.36, + "end": 91.38, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.38, + "end": 91.4, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.4, + "end": 91.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.42, + "end": 91.44, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.44, + "end": 91.46, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.46, + "end": 91.48, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.48, + "end": 91.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.5, + "end": 91.52, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.52, + "end": 91.54, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.54, + "end": 91.56, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.56, + "end": 91.58, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.58, + "end": 91.6, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.6, + "end": 91.62, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.62, + "end": 91.64, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.64, + "end": 91.66, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.66, + "end": 91.68, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.68, + "end": 91.7, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.7, + "end": 91.72, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.72, + "end": 91.74, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.74, + "end": 91.76, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.76, + "end": 91.78, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.78, + "end": 91.8, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.8, + "end": 91.82, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.82, + "end": 91.84, + "confidence": 0.995 + }, + { + "text": "them", + "start": 91.84, + "end": 91.86, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/punctuations_no/bonjour.wav.csv b/tests/expected/punctuations_no/bonjour.wav.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cbf6b2289864210ec6373166aa3e624eba1bde6 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.csv @@ -0,0 +1 @@ +Bonjour !,0.14,0.94 diff --git a/tests/expected/punctuations_no/bonjour.wav.srt b/tests/expected/punctuations_no/bonjour.wav.srt new file mode 100644 index 0000000000000000000000000000000000000000..d8d205dc3daf944fcf8cbd38edc5f45287fb5510 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour ! + diff --git a/tests/expected/punctuations_no/bonjour.wav.tsv b/tests/expected/punctuations_no/bonjour.wav.tsv new file mode 100644 index 0000000000000000000000000000000000000000..4244f49ea0e650559f762b334550dd60e0a2f2a6 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour ! diff --git a/tests/expected/punctuations_no/bonjour.wav.txt b/tests/expected/punctuations_no/bonjour.wav.txt new file mode 100644 index 0000000000000000000000000000000000000000..6625d5f9893711f1c711cc5a3695219d1f4d9cc4 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.txt @@ -0,0 +1 @@ +Bonjour ! diff --git a/tests/expected/punctuations_no/bonjour.wav.vtt b/tests/expected/punctuations_no/bonjour.wav.vtt new file mode 100644 index 0000000000000000000000000000000000000000..ecc03120fcb44b8ed669288169bdae00ce0c79ec --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.vtt @@ -0,0 +1,7 @@ +WEBVTT + +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour ! + diff --git a/tests/expected/punctuations_no/bonjour.wav.words.csv b/tests/expected/punctuations_no/bonjour.wav.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..2ebbe298c64c70895fa7d45404203bc3a58d99d0 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.csv @@ -0,0 +1 @@ +Bonjour,0.14,0.94 diff --git a/tests/expected/punctuations_no/bonjour.wav.words.json b/tests/expected/punctuations_no/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..be46e53f9a139741c8adee254f302e95fada75b9 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7047327041625977, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08847080171108246, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_no/bonjour.wav.words.srt b/tests/expected/punctuations_no/bonjour.wav.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..cd14abb1f5b082f428f7e68574088414fa661db8 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour + diff --git a/tests/expected/punctuations_no/bonjour.wav.words.tsv b/tests/expected/punctuations_no/bonjour.wav.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..2ddd00966ddffa056b18e3903294c7ef5795ffe1 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour diff --git a/tests/expected/punctuations_no/bonjour.wav.words.vtt b/tests/expected/punctuations_no/bonjour.wav.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..a3ce34860a05ec1125085a02e60a5eac85458c72 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.vtt @@ -0,0 +1,7 @@ +WEBVTT + +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour + diff --git a/tests/expected/punctuations_no/punctuations.mp3.csv b/tests/expected/punctuations_no/punctuations.mp3.csv new file mode 100644 index 0000000000000000000000000000000000000000..99139dd1ffcb8551acde7072c34682a561976ead --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.csv @@ -0,0 +1 @@ +"Dis-moi, est-ce que l'avion vole ?",0.38,2.76 diff --git a/tests/expected/punctuations_no/punctuations.mp3.srt b/tests/expected/punctuations_no/punctuations.mp3.srt new file mode 100644 index 0000000000000000000000000000000000000000..877851ff9664283f0877d16b24551fa2d7a88226 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.srt @@ -0,0 +1,4 @@ +1 +00:00:00,380 --> 00:00:02,760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_no/punctuations.mp3.tsv b/tests/expected/punctuations_no/punctuations.mp3.tsv new file mode 100644 index 0000000000000000000000000000000000000000..58cf737627008302c822b962a66b0b7e46a3a1de --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.tsv @@ -0,0 +1,2 @@ +start end text +380 2760 Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_no/punctuations.mp3.txt b/tests/expected/punctuations_no/punctuations.mp3.txt new file mode 100644 index 0000000000000000000000000000000000000000..6490b685ff0adf8d95873de48095732fd91c30fd --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.txt @@ -0,0 +1 @@ +Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_no/punctuations.mp3.vtt b/tests/expected/punctuations_no/punctuations.mp3.vtt new file mode 100644 index 0000000000000000000000000000000000000000..8397a28e233bce3dbd3ddbf7a4226bf3c210e870 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.vtt @@ -0,0 +1,7 @@ +WEBVTT + +WEBVTT + +00:00.380 --> 00:02.760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.csv b/tests/expected/punctuations_no/punctuations.mp3.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..69362f890ad8d996e811cebb54bfabcdd64daf63 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.csv @@ -0,0 +1,5 @@ +Dis-moi,0.38,1.1 +est-ce,1.28,1.5 +que,1.5,1.66 +l'avion,1.66,2.04 +vole,2.04,2.76 diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.json b/tests/expected/punctuations_no/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..3dc1f87e063dbd458cc97bf2e6ed781d223ac7da --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26349667941822724, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03940592333674431, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi", + "start": 0.38, + "end": 1.1, + "confidence": 0.807 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.srt b/tests/expected/punctuations_no/punctuations.mp3.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..9a0041e682bad02b37134d8a0d764cd07b64f608 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.srt @@ -0,0 +1,20 @@ +1 +00:00:00,380 --> 00:00:01,100 +Dis-moi + +2 +00:00:01,280 --> 00:00:01,500 +est-ce + +3 +00:00:01,500 --> 00:00:01,660 +que + +4 +00:00:01,660 --> 00:00:02,040 +l'avion + +5 +00:00:02,040 --> 00:00:02,760 +vole + diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.tsv b/tests/expected/punctuations_no/punctuations.mp3.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..a9613c4fe6697b2aa5ea56eecd09fb9c18d32210 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.tsv @@ -0,0 +1,6 @@ +start end text +380 1100 Dis-moi +1280 1500 est-ce +1500 1660 que +1660 2040 l'avion +2040 2760 vole diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.vtt b/tests/expected/punctuations_no/punctuations.mp3.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..fc6f8a3b6558650512d59fcdc3ce41735b03176d --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.vtt @@ -0,0 +1,19 @@ +WEBVTT + +WEBVTT + +00:00.380 --> 00:01.100 +Dis-moi + +00:01.280 --> 00:01.500 +est-ce + +00:01.500 --> 00:01.660 +que + +00:01.660 --> 00:02.040 +l'avion + +00:02.040 --> 00:02.760 +vole + diff --git a/tests/expected/punctuations_yes/bonjour.wav.csv b/tests/expected/punctuations_yes/bonjour.wav.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cbf6b2289864210ec6373166aa3e624eba1bde6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.csv @@ -0,0 +1 @@ +Bonjour !,0.14,0.94 diff --git a/tests/expected/punctuations_yes/bonjour.wav.srt b/tests/expected/punctuations_yes/bonjour.wav.srt new file mode 100644 index 0000000000000000000000000000000000000000..d8d205dc3daf944fcf8cbd38edc5f45287fb5510 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/bonjour.wav.tsv b/tests/expected/punctuations_yes/bonjour.wav.tsv new file mode 100644 index 0000000000000000000000000000000000000000..4244f49ea0e650559f762b334550dd60e0a2f2a6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour ! diff --git a/tests/expected/punctuations_yes/bonjour.wav.txt b/tests/expected/punctuations_yes/bonjour.wav.txt new file mode 100644 index 0000000000000000000000000000000000000000..6625d5f9893711f1c711cc5a3695219d1f4d9cc4 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.txt @@ -0,0 +1 @@ +Bonjour ! diff --git a/tests/expected/punctuations_yes/bonjour.wav.vtt b/tests/expected/punctuations_yes/bonjour.wav.vtt new file mode 100644 index 0000000000000000000000000000000000000000..ecc03120fcb44b8ed669288169bdae00ce0c79ec --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.vtt @@ -0,0 +1,7 @@ +WEBVTT + +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.csv b/tests/expected/punctuations_yes/bonjour.wav.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cbf6b2289864210ec6373166aa3e624eba1bde6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.csv @@ -0,0 +1 @@ +Bonjour !,0.14,0.94 diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.json b/tests/expected/punctuations_yes/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..0ceff460cfed744a26ce7dd494b00fb39619a892 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7047327041625977, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08847080171108246, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.srt b/tests/expected/punctuations_yes/bonjour.wav.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..d8d205dc3daf944fcf8cbd38edc5f45287fb5510 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.tsv b/tests/expected/punctuations_yes/bonjour.wav.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..4244f49ea0e650559f762b334550dd60e0a2f2a6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour ! diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.vtt b/tests/expected/punctuations_yes/bonjour.wav.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..ecc03120fcb44b8ed669288169bdae00ce0c79ec --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.vtt @@ -0,0 +1,7 @@ +WEBVTT + +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.csv b/tests/expected/punctuations_yes/punctuations.mp3.csv new file mode 100644 index 0000000000000000000000000000000000000000..99139dd1ffcb8551acde7072c34682a561976ead --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.csv @@ -0,0 +1 @@ +"Dis-moi, est-ce que l'avion vole ?",0.38,2.76 diff --git a/tests/expected/punctuations_yes/punctuations.mp3.srt b/tests/expected/punctuations_yes/punctuations.mp3.srt new file mode 100644 index 0000000000000000000000000000000000000000..877851ff9664283f0877d16b24551fa2d7a88226 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.srt @@ -0,0 +1,4 @@ +1 +00:00:00,380 --> 00:00:02,760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.tsv b/tests/expected/punctuations_yes/punctuations.mp3.tsv new file mode 100644 index 0000000000000000000000000000000000000000..58cf737627008302c822b962a66b0b7e46a3a1de --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.tsv @@ -0,0 +1,2 @@ +start end text +380 2760 Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_yes/punctuations.mp3.txt b/tests/expected/punctuations_yes/punctuations.mp3.txt new file mode 100644 index 0000000000000000000000000000000000000000..6490b685ff0adf8d95873de48095732fd91c30fd --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.txt @@ -0,0 +1 @@ +Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_yes/punctuations.mp3.vtt b/tests/expected/punctuations_yes/punctuations.mp3.vtt new file mode 100644 index 0000000000000000000000000000000000000000..8397a28e233bce3dbd3ddbf7a4226bf3c210e870 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.vtt @@ -0,0 +1,7 @@ +WEBVTT + +WEBVTT + +00:00.380 --> 00:02.760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.csv b/tests/expected/punctuations_yes/punctuations.mp3.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..c6a4613439d9ffda67122981bdfc9d5896416c73 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.csv @@ -0,0 +1,5 @@ +"Dis-moi,",0.38,1.1 +est-ce,1.28,1.5 +que,1.5,1.66 +l'avion,1.66,2.04 +vole ?,2.04,2.76 diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.json b/tests/expected/punctuations_yes/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f1765f3c8fa2266342bd52ade9f7749c6a73b8 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26349667941822724, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03940592333674431, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi,", + "start": 0.38, + "end": 1.1, + "confidence": 0.807 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole ?", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.srt b/tests/expected/punctuations_yes/punctuations.mp3.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..91c0fbf35dff3f5043392a4b6035510d073372d4 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.srt @@ -0,0 +1,20 @@ +1 +00:00:00,380 --> 00:00:01,100 +Dis-moi, + +2 +00:00:01,280 --> 00:00:01,500 +est-ce + +3 +00:00:01,500 --> 00:00:01,660 +que + +4 +00:00:01,660 --> 00:00:02,040 +l'avion + +5 +00:00:02,040 --> 00:00:02,760 +vole ? + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.tsv b/tests/expected/punctuations_yes/punctuations.mp3.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..f267961f36fcab7036a58e7e979625d89df087e4 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.tsv @@ -0,0 +1,6 @@ +start end text +380 1100 Dis-moi, +1280 1500 est-ce +1500 1660 que +1660 2040 l'avion +2040 2760 vole ? diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.vtt b/tests/expected/punctuations_yes/punctuations.mp3.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..0a2cdcec1b994bb7512933594d72181013183838 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.vtt @@ -0,0 +1,19 @@ +WEBVTT + +WEBVTT + +00:00.380 --> 00:01.100 +Dis-moi, + +00:01.280 --> 00:01.500 +est-ce + +00:01.500 --> 00:01.660 +que + +00:01.660 --> 00:02.040 +l'avion + +00:02.040 --> 00:02.760 +vole ? + diff --git a/tests/expected/small.en.cpu/arabic.mp3.words.json b/tests/expected/small.en.cpu/arabic.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..acb72a1674c019af75523e14dc77422914030a68 --- /dev/null +++ b/tests/expected/small.en.cpu/arabic.mp3.words.json @@ -0,0 +1,3346 @@ +{ + "text": " I am the one who is the one who is the one who is the one who is the one who is the one I am the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.0, + "end": 7.72, + "text": " I am the one who is the one who is the one who is the one who is the one who is the one", + "tokens": [ + 314, + 716, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.23478534274631077, + "compression_ratio": 24.294117647058822, + "no_speech_prob": 0.6507940292358398, + "confidence": 0.358, + "words": [ + { + "text": "I", + "start": 1.0, + "end": 3.16, + "confidence": 0.053 + }, + { + "text": "am", + "start": 3.16, + "end": 4.04, + "confidence": 0.152 + }, + { + "text": "the", + "start": 4.04, + "end": 4.08, + "confidence": 0.143 + }, + { + "text": "one", + "start": 4.08, + "end": 6.1, + "confidence": 0.085 + }, + { + "text": "who", + "start": 6.1, + "end": 6.28, + "confidence": 0.718 + }, + { + "text": "is", + "start": 6.28, + "end": 6.32, + "confidence": 0.12 + }, + { + "text": "the", + "start": 6.32, + "end": 6.36, + "confidence": 0.134 + }, + { + "text": "one", + "start": 6.36, + "end": 6.4, + "confidence": 0.204 + }, + { + "text": "who", + "start": 6.4, + "end": 6.44, + "confidence": 0.618 + }, + { + "text": "is", + "start": 6.44, + "end": 6.96, + "confidence": 0.319 + }, + { + "text": "the", + "start": 6.96, + "end": 7.0, + "confidence": 0.382 + }, + { + "text": "one", + "start": 7.0, + "end": 7.24, + "confidence": 0.443 + }, + { + "text": "who", + "start": 7.24, + "end": 7.28, + "confidence": 0.448 + }, + { + "text": "is", + "start": 7.28, + "end": 7.32, + "confidence": 0.518 + }, + { + "text": "the", + "start": 7.32, + "end": 7.36, + "confidence": 0.543 + }, + { + "text": "one", + "start": 7.36, + "end": 7.4, + "confidence": 0.723 + }, + { + "text": "who", + "start": 7.4, + "end": 7.44, + "confidence": 0.433 + }, + { + "text": "is", + "start": 7.44, + "end": 7.48, + "confidence": 0.678 + }, + { + "text": "the", + "start": 7.48, + "end": 7.52, + "confidence": 0.636 + }, + { + "text": "one", + "start": 7.52, + "end": 7.56, + "confidence": 0.817 + }, + { + "text": "who", + "start": 7.56, + "end": 7.6, + "confidence": 0.505 + }, + { + "text": "is", + "start": 7.6, + "end": 7.64, + "confidence": 0.74 + }, + { + "text": "the", + "start": 7.64, + "end": 7.68, + "confidence": 0.697 + }, + { + "text": "one", + "start": 7.68, + "end": 7.72, + "confidence": 0.698 + } + ] + }, + { + "id": 1, + "seek": 700, + "start": 7.72, + "end": 37.02, + "text": " I am the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the", + "tokens": [ + 50363, + 314, + 716, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262 + ], + "temperature": 0.0, + "avg_logprob": -0.06210707770453559, + "compression_ratio": 24.5, + "no_speech_prob": 3.115955405519344e-05, + "confidence": 0.939, + "words": [ + { + "text": "I", + "start": 7.72, + "end": 9.42, + "confidence": 0.22 + }, + { + "text": "am", + "start": 9.42, + "end": 12.1, + "confidence": 0.81 + }, + { + "text": "the", + "start": 12.1, + "end": 12.48, + "confidence": 0.872 + }, + { + "text": "one", + "start": 12.48, + "end": 12.58, + "confidence": 0.927 + }, + { + "text": "who", + "start": 12.58, + "end": 13.4, + "confidence": 0.966 + }, + { + "text": "is", + "start": 13.4, + "end": 13.96, + "confidence": 0.934 + }, + { + "text": "the", + "start": 13.96, + "end": 14.56, + "confidence": 0.968 + }, + { + "text": "one", + "start": 14.56, + "end": 14.6, + "confidence": 0.989 + }, + { + "text": "who", + "start": 14.6, + "end": 14.64, + "confidence": 0.951 + }, + { + "text": "is", + "start": 14.64, + "end": 14.68, + "confidence": 0.965 + }, + { + "text": "the", + "start": 14.68, + "end": 15.04, + "confidence": 0.977 + }, + { + "text": "one", + "start": 15.04, + "end": 17.78, + "confidence": 0.993 + }, + { + "text": "who", + "start": 17.78, + "end": 17.82, + "confidence": 0.843 + }, + { + "text": "is", + "start": 17.82, + "end": 18.58, + "confidence": 0.977 + }, + { + "text": "the", + "start": 18.58, + "end": 18.62, + "confidence": 0.979 + }, + { + "text": "one", + "start": 18.62, + "end": 18.66, + "confidence": 0.991 + }, + { + "text": "who", + "start": 18.66, + "end": 19.46, + "confidence": 0.694 + }, + { + "text": "is", + "start": 19.46, + "end": 20.9, + "confidence": 0.979 + }, + { + "text": "the", + "start": 20.9, + "end": 20.94, + "confidence": 0.978 + }, + { + "text": "one", + "start": 20.94, + "end": 20.98, + "confidence": 0.99 + }, + { + "text": "who", + "start": 20.98, + "end": 21.02, + "confidence": 0.657 + }, + { + "text": "is", + "start": 21.02, + "end": 21.06, + "confidence": 0.977 + }, + { + "text": "the", + "start": 21.06, + "end": 21.52, + "confidence": 0.974 + }, + { + "text": "one", + "start": 21.52, + "end": 21.56, + "confidence": 0.983 + }, + { + "text": "who", + "start": 21.56, + "end": 21.6, + "confidence": 0.609 + }, + { + "text": "is", + "start": 21.6, + "end": 22.04, + "confidence": 0.978 + }, + { + "text": "the", + "start": 22.04, + "end": 22.5, + "confidence": 0.966 + }, + { + "text": "one", + "start": 22.5, + "end": 23.6, + "confidence": 0.986 + }, + { + "text": "who", + "start": 23.6, + "end": 24.76, + "confidence": 0.624 + }, + { + "text": "is", + "start": 24.76, + "end": 26.18, + "confidence": 0.978 + }, + { + "text": "the", + "start": 26.18, + "end": 26.8, + "confidence": 0.962 + }, + { + "text": "one", + "start": 26.8, + "end": 26.84, + "confidence": 0.984 + }, + { + "text": "who", + "start": 26.84, + "end": 26.88, + "confidence": 0.679 + }, + { + "text": "is", + "start": 26.88, + "end": 27.4, + "confidence": 0.978 + }, + { + "text": "the", + "start": 27.4, + "end": 28.14, + "confidence": 0.958 + }, + { + "text": "one", + "start": 28.14, + "end": 28.28, + "confidence": 0.98 + }, + { + "text": "who", + "start": 28.28, + "end": 28.32, + "confidence": 0.671 + }, + { + "text": "is", + "start": 28.32, + "end": 28.36, + "confidence": 0.979 + }, + { + "text": "the", + "start": 28.36, + "end": 28.4, + "confidence": 0.955 + }, + { + "text": "one", + "start": 28.4, + "end": 28.8, + "confidence": 0.975 + }, + { + "text": "who", + "start": 28.8, + "end": 29.14, + "confidence": 0.683 + }, + { + "text": "is", + "start": 29.14, + "end": 29.18, + "confidence": 0.978 + }, + { + "text": "the", + "start": 29.18, + "end": 29.22, + "confidence": 0.956 + }, + { + "text": "one", + "start": 29.22, + "end": 29.56, + "confidence": 0.972 + }, + { + "text": "who", + "start": 29.56, + "end": 29.74, + "confidence": 0.695 + }, + { + "text": "is", + "start": 29.74, + "end": 29.78, + "confidence": 0.978 + }, + { + "text": "the", + "start": 29.78, + "end": 29.82, + "confidence": 0.957 + }, + { + "text": "one", + "start": 29.82, + "end": 29.86, + "confidence": 0.971 + }, + { + "text": "who", + "start": 29.86, + "end": 29.9, + "confidence": 0.713 + }, + { + "text": "is", + "start": 29.9, + "end": 29.94, + "confidence": 0.98 + }, + { + "text": "the", + "start": 29.94, + "end": 29.98, + "confidence": 0.958 + }, + { + "text": "one", + "start": 29.98, + "end": 30.02, + "confidence": 0.971 + }, + { + "text": "who", + "start": 30.02, + "end": 30.06, + "confidence": 0.737 + }, + { + "text": "is", + "start": 30.06, + "end": 30.1, + "confidence": 0.982 + }, + { + "text": "the", + "start": 30.1, + "end": 30.14, + "confidence": 0.96 + }, + { + "text": "one", + "start": 30.14, + "end": 30.18, + "confidence": 0.971 + }, + { + "text": "who", + "start": 30.18, + "end": 30.22, + "confidence": 0.754 + }, + { + "text": "is", + "start": 30.22, + "end": 30.26, + "confidence": 0.984 + }, + { + "text": "the", + "start": 30.26, + "end": 30.3, + "confidence": 0.962 + }, + { + "text": "one", + "start": 30.3, + "end": 30.34, + "confidence": 0.973 + }, + { + "text": "who", + "start": 30.34, + "end": 30.38, + "confidence": 0.759 + }, + { + "text": "is", + "start": 30.38, + "end": 30.42, + "confidence": 0.986 + }, + { + "text": "the", + "start": 30.42, + "end": 30.46, + "confidence": 0.964 + }, + { + "text": "one", + "start": 30.46, + "end": 30.5, + "confidence": 0.974 + }, + { + "text": "who", + "start": 30.5, + "end": 30.54, + "confidence": 0.765 + }, + { + "text": "is", + "start": 30.54, + "end": 30.58, + "confidence": 0.987 + }, + { + "text": "the", + "start": 30.58, + "end": 30.62, + "confidence": 0.966 + }, + { + "text": "one", + "start": 30.62, + "end": 30.66, + "confidence": 0.975 + }, + { + "text": "who", + "start": 30.66, + "end": 30.7, + "confidence": 0.774 + }, + { + "text": "is", + "start": 30.7, + "end": 30.74, + "confidence": 0.988 + }, + { + "text": "the", + "start": 30.74, + "end": 30.78, + "confidence": 0.969 + }, + { + "text": "one", + "start": 30.78, + "end": 30.82, + "confidence": 0.975 + }, + { + "text": "who", + "start": 30.82, + "end": 30.86, + "confidence": 0.801 + }, + { + "text": "is", + "start": 30.86, + "end": 30.9, + "confidence": 0.989 + }, + { + "text": "the", + "start": 30.9, + "end": 30.94, + "confidence": 0.97 + }, + { + "text": "one", + "start": 30.94, + "end": 30.98, + "confidence": 0.976 + }, + { + "text": "who", + "start": 30.98, + "end": 31.02, + "confidence": 0.818 + }, + { + "text": "is", + "start": 31.02, + "end": 31.06, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.06, + "end": 31.1, + "confidence": 0.972 + }, + { + "text": "one", + "start": 31.1, + "end": 31.14, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.14, + "end": 31.18, + "confidence": 0.838 + }, + { + "text": "is", + "start": 31.18, + "end": 31.22, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.22, + "end": 31.26, + "confidence": 0.973 + }, + { + "text": "one", + "start": 31.26, + "end": 31.3, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.3, + "end": 31.34, + "confidence": 0.862 + }, + { + "text": "is", + "start": 31.34, + "end": 31.38, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.38, + "end": 31.42, + "confidence": 0.974 + }, + { + "text": "one", + "start": 31.42, + "end": 31.46, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.46, + "end": 31.5, + "confidence": 0.882 + }, + { + "text": "is", + "start": 31.5, + "end": 31.54, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.54, + "end": 31.58, + "confidence": 0.974 + }, + { + "text": "one", + "start": 31.58, + "end": 31.62, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.62, + "end": 31.66, + "confidence": 0.895 + }, + { + "text": "is", + "start": 31.66, + "end": 31.7, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.7, + "end": 31.74, + "confidence": 0.975 + }, + { + "text": "one", + "start": 31.74, + "end": 31.78, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.78, + "end": 31.82, + "confidence": 0.905 + }, + { + "text": "is", + "start": 31.82, + "end": 31.86, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.86, + "end": 31.9, + "confidence": 0.975 + }, + { + "text": "one", + "start": 31.9, + "end": 31.94, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.94, + "end": 31.98, + "confidence": 0.92 + }, + { + "text": "is", + "start": 31.98, + "end": 32.02, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.02, + "end": 32.06, + "confidence": 0.976 + }, + { + "text": "one", + "start": 32.06, + "end": 32.1, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.1, + "end": 32.14, + "confidence": 0.924 + }, + { + "text": "is", + "start": 32.14, + "end": 32.18, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.18, + "end": 32.22, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.22, + "end": 32.26, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.26, + "end": 32.3, + "confidence": 0.932 + }, + { + "text": "is", + "start": 32.3, + "end": 32.34, + "confidence": 0.989 + }, + { + "text": "the", + "start": 32.34, + "end": 32.38, + "confidence": 0.976 + }, + { + "text": "one", + "start": 32.38, + "end": 32.42, + "confidence": 0.977 + }, + { + "text": "who", + "start": 32.42, + "end": 32.46, + "confidence": 0.936 + }, + { + "text": "is", + "start": 32.46, + "end": 32.5, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.5, + "end": 32.54, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.54, + "end": 32.58, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.58, + "end": 32.62, + "confidence": 0.94 + }, + { + "text": "is", + "start": 32.62, + "end": 32.66, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.66, + "end": 32.7, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.7, + "end": 32.74, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.74, + "end": 32.78, + "confidence": 0.942 + }, + { + "text": "is", + "start": 32.78, + "end": 32.82, + "confidence": 0.989 + }, + { + "text": "the", + "start": 32.82, + "end": 32.86, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.86, + "end": 32.9, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.9, + "end": 32.94, + "confidence": 0.941 + }, + { + "text": "is", + "start": 32.94, + "end": 32.98, + "confidence": 0.989 + }, + { + "text": "the", + "start": 32.98, + "end": 33.02, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.02, + "end": 33.06, + "confidence": 0.978 + }, + { + "text": "who", + "start": 33.06, + "end": 33.1, + "confidence": 0.942 + }, + { + "text": "is", + "start": 33.1, + "end": 33.14, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.14, + "end": 33.18, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.18, + "end": 33.22, + "confidence": 0.978 + }, + { + "text": "who", + "start": 33.22, + "end": 33.26, + "confidence": 0.942 + }, + { + "text": "is", + "start": 33.26, + "end": 33.3, + "confidence": 0.99 + }, + { + "text": "the", + "start": 33.3, + "end": 33.34, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.34, + "end": 33.38, + "confidence": 0.978 + }, + { + "text": "who", + "start": 33.38, + "end": 33.42, + "confidence": 0.941 + }, + { + "text": "is", + "start": 33.42, + "end": 33.46, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.46, + "end": 33.5, + "confidence": 0.979 + }, + { + "text": "one", + "start": 33.5, + "end": 33.54, + "confidence": 0.977 + }, + { + "text": "who", + "start": 33.54, + "end": 33.58, + "confidence": 0.941 + }, + { + "text": "is", + "start": 33.58, + "end": 33.62, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.62, + "end": 33.66, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.66, + "end": 33.7, + "confidence": 0.977 + }, + { + "text": "who", + "start": 33.7, + "end": 33.74, + "confidence": 0.939 + }, + { + "text": "is", + "start": 33.74, + "end": 33.78, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.78, + "end": 33.82, + "confidence": 0.979 + }, + { + "text": "one", + "start": 33.82, + "end": 33.86, + "confidence": 0.977 + }, + { + "text": "who", + "start": 33.86, + "end": 33.9, + "confidence": 0.939 + }, + { + "text": "is", + "start": 33.9, + "end": 33.94, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.94, + "end": 33.98, + "confidence": 0.979 + }, + { + "text": "one", + "start": 33.98, + "end": 34.02, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.02, + "end": 34.06, + "confidence": 0.938 + }, + { + "text": "is", + "start": 34.06, + "end": 34.1, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.1, + "end": 34.14, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.14, + "end": 34.18, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.18, + "end": 34.22, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.22, + "end": 34.26, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.26, + "end": 34.3, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.3, + "end": 34.34, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.34, + "end": 34.38, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.38, + "end": 34.42, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.42, + "end": 34.46, + "confidence": 0.979 + }, + { + "text": "one", + "start": 34.46, + "end": 34.5, + "confidence": 0.976 + }, + { + "text": "who", + "start": 34.5, + "end": 34.54, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.54, + "end": 34.58, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.58, + "end": 34.62, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.62, + "end": 34.66, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.66, + "end": 34.7, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.7, + "end": 34.74, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.74, + "end": 34.78, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.78, + "end": 34.82, + "confidence": 0.976 + }, + { + "text": "who", + "start": 34.82, + "end": 34.86, + "confidence": 0.935 + }, + { + "text": "is", + "start": 34.86, + "end": 34.9, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.9, + "end": 34.94, + "confidence": 0.979 + }, + { + "text": "one", + "start": 34.94, + "end": 34.98, + "confidence": 0.976 + }, + { + "text": "who", + "start": 34.98, + "end": 35.02, + "confidence": 0.936 + }, + { + "text": "is", + "start": 35.02, + "end": 35.06, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.06, + "end": 35.1, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.1, + "end": 35.14, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.14, + "end": 35.18, + "confidence": 0.936 + }, + { + "text": "is", + "start": 35.18, + "end": 35.22, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.22, + "end": 35.26, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.26, + "end": 35.3, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.3, + "end": 35.34, + "confidence": 0.935 + }, + { + "text": "is", + "start": 35.34, + "end": 35.38, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.38, + "end": 35.42, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.42, + "end": 35.46, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.46, + "end": 35.5, + "confidence": 0.934 + }, + { + "text": "is", + "start": 35.5, + "end": 35.54, + "confidence": 0.986 + }, + { + "text": "the", + "start": 35.54, + "end": 35.58, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.58, + "end": 35.62, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.62, + "end": 35.66, + "confidence": 0.933 + }, + { + "text": "is", + "start": 35.66, + "end": 35.7, + "confidence": 0.985 + }, + { + "text": "the", + "start": 35.7, + "end": 35.74, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.74, + "end": 35.78, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.78, + "end": 35.82, + "confidence": 0.939 + }, + { + "text": "is", + "start": 35.82, + "end": 35.86, + "confidence": 0.986 + }, + { + "text": "the", + "start": 35.86, + "end": 35.9, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.9, + "end": 35.94, + "confidence": 0.975 + }, + { + "text": "who", + "start": 35.94, + "end": 35.98, + "confidence": 0.932 + }, + { + "text": "is", + "start": 35.98, + "end": 36.02, + "confidence": 0.984 + }, + { + "text": "the", + "start": 36.02, + "end": 36.06, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.06, + "end": 36.1, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.1, + "end": 36.14, + "confidence": 0.931 + }, + { + "text": "is", + "start": 36.14, + "end": 36.18, + "confidence": 0.984 + }, + { + "text": "the", + "start": 36.18, + "end": 36.22, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.22, + "end": 36.26, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.26, + "end": 36.3, + "confidence": 0.929 + }, + { + "text": "is", + "start": 36.3, + "end": 36.34, + "confidence": 0.983 + }, + { + "text": "the", + "start": 36.34, + "end": 36.38, + "confidence": 0.975 + }, + { + "text": "one", + "start": 36.38, + "end": 36.42, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.42, + "end": 36.46, + "confidence": 0.929 + }, + { + "text": "is", + "start": 36.46, + "end": 36.5, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.5, + "end": 36.54, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.54, + "end": 36.58, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.58, + "end": 36.62, + "confidence": 0.928 + }, + { + "text": "is", + "start": 36.62, + "end": 36.66, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.66, + "end": 36.7, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.7, + "end": 36.74, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.74, + "end": 36.92, + "confidence": 0.928 + }, + { + "text": "is", + "start": 36.92, + "end": 36.98, + "confidence": 0.981 + }, + { + "text": "the", + "start": 36.98, + "end": 37.02, + "confidence": 0.973 + } + ] + }, + { + "id": 2, + "seek": 3700, + "start": 37.02, + "end": 67.0, + "text": " one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "tokens": [ + 50363, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318 + ], + "temperature": 0.0, + "avg_logprob": -0.017284017139010958, + "compression_ratio": 26.93548387096774, + "no_speech_prob": 0.09498446434736252, + "confidence": 0.983, + "words": [ + { + "text": "one", + "start": 37.02, + "end": 37.4, + "confidence": 0.964 + }, + { + "text": "who", + "start": 37.4, + "end": 39.86, + "confidence": 0.961 + }, + { + "text": "is", + "start": 39.86, + "end": 39.9, + "confidence": 0.977 + }, + { + "text": "the", + "start": 39.9, + "end": 39.94, + "confidence": 0.975 + }, + { + "text": "one", + "start": 39.94, + "end": 39.98, + "confidence": 0.964 + }, + { + "text": "who", + "start": 39.98, + "end": 40.02, + "confidence": 0.982 + }, + { + "text": "is", + "start": 40.02, + "end": 40.06, + "confidence": 0.989 + }, + { + "text": "the", + "start": 40.06, + "end": 40.1, + "confidence": 0.978 + }, + { + "text": "one", + "start": 40.1, + "end": 40.14, + "confidence": 0.98 + }, + { + "text": "who", + "start": 40.14, + "end": 40.18, + "confidence": 0.989 + }, + { + "text": "is", + "start": 40.18, + "end": 40.22, + "confidence": 0.992 + }, + { + "text": "the", + "start": 40.22, + "end": 40.26, + "confidence": 0.97 + }, + { + "text": "one", + "start": 40.26, + "end": 40.3, + "confidence": 0.982 + }, + { + "text": "who", + "start": 40.3, + "end": 40.34, + "confidence": 0.989 + }, + { + "text": "is", + "start": 40.34, + "end": 40.38, + "confidence": 0.993 + }, + { + "text": "the", + "start": 40.38, + "end": 40.42, + "confidence": 0.971 + }, + { + "text": "one", + "start": 40.42, + "end": 40.46, + "confidence": 0.984 + }, + { + "text": "who", + "start": 40.46, + "end": 40.5, + "confidence": 0.99 + }, + { + "text": "is", + "start": 40.5, + "end": 40.54, + "confidence": 0.994 + }, + { + "text": "the", + "start": 40.54, + "end": 40.58, + "confidence": 0.969 + }, + { + "text": "one", + "start": 40.58, + "end": 40.62, + "confidence": 0.985 + }, + { + "text": "who", + "start": 40.62, + "end": 40.66, + "confidence": 0.989 + }, + { + "text": "is", + "start": 40.66, + "end": 40.7, + "confidence": 0.991 + }, + { + "text": "the", + "start": 40.7, + "end": 40.74, + "confidence": 0.625 + }, + { + "text": "one", + "start": 40.74, + "end": 40.78, + "confidence": 0.947 + }, + { + "text": "who", + "start": 40.78, + "end": 40.82, + "confidence": 0.985 + }, + { + "text": "is", + "start": 40.82, + "end": 40.86, + "confidence": 0.99 + }, + { + "text": "the", + "start": 40.86, + "end": 40.9, + "confidence": 0.91 + }, + { + "text": "one", + "start": 40.9, + "end": 40.94, + "confidence": 0.984 + }, + { + "text": "who", + "start": 40.94, + "end": 40.98, + "confidence": 0.986 + }, + { + "text": "is", + "start": 40.98, + "end": 41.02, + "confidence": 0.994 + }, + { + "text": "the", + "start": 41.02, + "end": 41.06, + "confidence": 0.933 + }, + { + "text": "one", + "start": 41.06, + "end": 41.1, + "confidence": 0.985 + }, + { + "text": "who", + "start": 41.1, + "end": 41.14, + "confidence": 0.988 + }, + { + "text": "is", + "start": 41.14, + "end": 41.18, + "confidence": 0.996 + }, + { + "text": "the", + "start": 41.18, + "end": 41.22, + "confidence": 0.948 + }, + { + "text": "one", + "start": 41.22, + "end": 41.26, + "confidence": 0.987 + }, + { + "text": "who", + "start": 41.26, + "end": 41.3, + "confidence": 0.989 + }, + { + "text": "is", + "start": 41.3, + "end": 41.34, + "confidence": 0.997 + }, + { + "text": "the", + "start": 41.34, + "end": 41.38, + "confidence": 0.961 + }, + { + "text": "one", + "start": 41.38, + "end": 41.42, + "confidence": 0.988 + }, + { + "text": "who", + "start": 41.42, + "end": 41.46, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.46, + "end": 41.5, + "confidence": 0.997 + }, + { + "text": "the", + "start": 41.5, + "end": 41.54, + "confidence": 0.969 + }, + { + "text": "one", + "start": 41.54, + "end": 41.58, + "confidence": 0.989 + }, + { + "text": "who", + "start": 41.58, + "end": 41.62, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.62, + "end": 41.66, + "confidence": 0.998 + }, + { + "text": "the", + "start": 41.66, + "end": 41.7, + "confidence": 0.976 + }, + { + "text": "one", + "start": 41.7, + "end": 41.74, + "confidence": 0.99 + }, + { + "text": "who", + "start": 41.74, + "end": 41.78, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.78, + "end": 41.82, + "confidence": 0.998 + }, + { + "text": "the", + "start": 41.82, + "end": 41.86, + "confidence": 0.981 + }, + { + "text": "one", + "start": 41.86, + "end": 41.9, + "confidence": 0.99 + }, + { + "text": "who", + "start": 41.9, + "end": 41.94, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.94, + "end": 41.98, + "confidence": 0.998 + }, + { + "text": "the", + "start": 41.98, + "end": 42.02, + "confidence": 0.984 + }, + { + "text": "one", + "start": 42.02, + "end": 42.06, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.06, + "end": 42.1, + "confidence": 0.99 + }, + { + "text": "is", + "start": 42.1, + "end": 42.14, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.14, + "end": 42.18, + "confidence": 0.987 + }, + { + "text": "one", + "start": 42.18, + "end": 42.22, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.22, + "end": 42.26, + "confidence": 0.99 + }, + { + "text": "is", + "start": 42.26, + "end": 42.3, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.3, + "end": 42.34, + "confidence": 0.988 + }, + { + "text": "one", + "start": 42.34, + "end": 42.38, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.38, + "end": 42.42, + "confidence": 0.989 + }, + { + "text": "is", + "start": 42.42, + "end": 42.46, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.46, + "end": 42.5, + "confidence": 0.989 + }, + { + "text": "one", + "start": 42.5, + "end": 42.54, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.54, + "end": 42.58, + "confidence": 0.989 + }, + { + "text": "is", + "start": 42.58, + "end": 42.62, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.62, + "end": 42.66, + "confidence": 0.99 + }, + { + "text": "one", + "start": 42.66, + "end": 42.7, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.7, + "end": 42.74, + "confidence": 0.989 + }, + { + "text": "is", + "start": 42.74, + "end": 42.78, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.78, + "end": 42.82, + "confidence": 0.99 + }, + { + "text": "one", + "start": 42.82, + "end": 42.86, + "confidence": 0.992 + }, + { + "text": "who", + "start": 42.86, + "end": 42.9, + "confidence": 0.988 + }, + { + "text": "is", + "start": 42.9, + "end": 42.94, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.94, + "end": 42.98, + "confidence": 0.991 + }, + { + "text": "one", + "start": 42.98, + "end": 43.02, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.02, + "end": 43.06, + "confidence": 0.988 + }, + { + "text": "is", + "start": 43.06, + "end": 43.1, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.1, + "end": 43.14, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.14, + "end": 43.18, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.18, + "end": 43.22, + "confidence": 0.988 + }, + { + "text": "is", + "start": 43.22, + "end": 43.26, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.26, + "end": 43.3, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.3, + "end": 43.34, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.34, + "end": 43.38, + "confidence": 0.988 + }, + { + "text": "is", + "start": 43.38, + "end": 43.42, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.42, + "end": 43.46, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.46, + "end": 43.5, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.5, + "end": 43.54, + "confidence": 0.987 + }, + { + "text": "is", + "start": 43.54, + "end": 43.58, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.58, + "end": 43.62, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.62, + "end": 43.66, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.66, + "end": 43.7, + "confidence": 0.987 + }, + { + "text": "is", + "start": 43.7, + "end": 43.74, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.74, + "end": 43.78, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.78, + "end": 43.82, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.82, + "end": 43.86, + "confidence": 0.987 + }, + { + "text": "is", + "start": 43.86, + "end": 43.9, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.9, + "end": 43.94, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.94, + "end": 43.98, + "confidence": 0.99 + }, + { + "text": "who", + "start": 43.98, + "end": 44.02, + "confidence": 0.986 + }, + { + "text": "is", + "start": 44.02, + "end": 44.06, + "confidence": 0.999 + }, + { + "text": "the", + "start": 44.06, + "end": 44.1, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.1, + "end": 44.14, + "confidence": 0.99 + }, + { + "text": "who", + "start": 44.14, + "end": 44.18, + "confidence": 0.986 + }, + { + "text": "is", + "start": 44.18, + "end": 44.22, + "confidence": 0.999 + }, + { + "text": "the", + "start": 44.22, + "end": 44.26, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.26, + "end": 44.3, + "confidence": 0.99 + }, + { + "text": "who", + "start": 44.3, + "end": 44.34, + "confidence": 0.985 + }, + { + "text": "is", + "start": 44.34, + "end": 44.38, + "confidence": 0.999 + }, + { + "text": "the", + "start": 44.38, + "end": 44.42, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.42, + "end": 44.46, + "confidence": 0.99 + }, + { + "text": "who", + "start": 44.46, + "end": 44.5, + "confidence": 0.985 + }, + { + "text": "is", + "start": 44.5, + "end": 44.54, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.54, + "end": 44.58, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.58, + "end": 44.62, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.62, + "end": 44.66, + "confidence": 0.984 + }, + { + "text": "is", + "start": 44.66, + "end": 44.7, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.7, + "end": 44.74, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.74, + "end": 44.78, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.78, + "end": 44.82, + "confidence": 0.983 + }, + { + "text": "is", + "start": 44.82, + "end": 44.86, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.86, + "end": 44.9, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.9, + "end": 44.94, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.94, + "end": 44.98, + "confidence": 0.983 + }, + { + "text": "is", + "start": 44.98, + "end": 45.02, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.02, + "end": 45.06, + "confidence": 0.991 + }, + { + "text": "one", + "start": 45.06, + "end": 45.1, + "confidence": 0.988 + }, + { + "text": "who", + "start": 45.1, + "end": 45.14, + "confidence": 0.982 + }, + { + "text": "is", + "start": 45.14, + "end": 45.18, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.18, + "end": 45.22, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.22, + "end": 45.26, + "confidence": 0.988 + }, + { + "text": "who", + "start": 45.26, + "end": 45.3, + "confidence": 0.98 + }, + { + "text": "is", + "start": 45.3, + "end": 45.34, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.34, + "end": 45.38, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.38, + "end": 45.42, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.42, + "end": 45.46, + "confidence": 0.979 + }, + { + "text": "is", + "start": 45.46, + "end": 45.5, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.5, + "end": 45.54, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.54, + "end": 45.58, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.58, + "end": 45.62, + "confidence": 0.978 + }, + { + "text": "is", + "start": 45.62, + "end": 45.66, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.66, + "end": 45.7, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.7, + "end": 45.74, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.74, + "end": 45.78, + "confidence": 0.976 + }, + { + "text": "is", + "start": 45.78, + "end": 45.82, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.82, + "end": 45.86, + "confidence": 0.989 + }, + { + "text": "one", + "start": 45.86, + "end": 45.9, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.9, + "end": 45.94, + "confidence": 0.976 + }, + { + "text": "is", + "start": 45.94, + "end": 45.98, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.98, + "end": 46.02, + "confidence": 0.989 + }, + { + "text": "one", + "start": 46.02, + "end": 46.06, + "confidence": 0.986 + }, + { + "text": "who", + "start": 46.06, + "end": 46.1, + "confidence": 0.974 + }, + { + "text": "is", + "start": 46.1, + "end": 46.14, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.14, + "end": 46.18, + "confidence": 0.989 + }, + { + "text": "one", + "start": 46.18, + "end": 46.22, + "confidence": 0.985 + }, + { + "text": "who", + "start": 46.22, + "end": 46.26, + "confidence": 0.972 + }, + { + "text": "is", + "start": 46.26, + "end": 46.3, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.3, + "end": 46.34, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.34, + "end": 46.38, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.38, + "end": 46.42, + "confidence": 0.971 + }, + { + "text": "is", + "start": 46.42, + "end": 46.46, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.46, + "end": 46.5, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.5, + "end": 46.54, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.54, + "end": 46.58, + "confidence": 0.97 + }, + { + "text": "is", + "start": 46.58, + "end": 46.62, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.62, + "end": 46.66, + "confidence": 0.987 + }, + { + "text": "one", + "start": 46.66, + "end": 46.7, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.7, + "end": 46.74, + "confidence": 0.968 + }, + { + "text": "is", + "start": 46.74, + "end": 46.78, + "confidence": 0.995 + }, + { + "text": "the", + "start": 46.78, + "end": 46.82, + "confidence": 0.987 + }, + { + "text": "one", + "start": 46.82, + "end": 46.86, + "confidence": 0.983 + }, + { + "text": "who", + "start": 46.86, + "end": 46.9, + "confidence": 0.966 + }, + { + "text": "is", + "start": 46.9, + "end": 46.94, + "confidence": 0.995 + }, + { + "text": "the", + "start": 46.94, + "end": 46.98, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.98, + "end": 47.02, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.02, + "end": 47.06, + "confidence": 0.964 + }, + { + "text": "is", + "start": 47.06, + "end": 47.1, + "confidence": 0.995 + }, + { + "text": "the", + "start": 47.1, + "end": 47.14, + "confidence": 0.986 + }, + { + "text": "one", + "start": 47.14, + "end": 47.18, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.18, + "end": 47.22, + "confidence": 0.963 + }, + { + "text": "is", + "start": 47.22, + "end": 47.26, + "confidence": 0.994 + }, + { + "text": "the", + "start": 47.26, + "end": 47.3, + "confidence": 0.985 + }, + { + "text": "one", + "start": 47.3, + "end": 47.34, + "confidence": 0.981 + }, + { + "text": "who", + "start": 47.34, + "end": 47.38, + "confidence": 0.959 + }, + { + "text": "is", + "start": 47.38, + "end": 47.42, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.42, + "end": 47.46, + "confidence": 0.984 + }, + { + "text": "one", + "start": 47.46, + "end": 47.5, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.5, + "end": 47.54, + "confidence": 0.958 + }, + { + "text": "is", + "start": 47.54, + "end": 47.58, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.58, + "end": 47.62, + "confidence": 0.983 + }, + { + "text": "one", + "start": 47.62, + "end": 47.66, + "confidence": 0.979 + }, + { + "text": "who", + "start": 47.66, + "end": 47.7, + "confidence": 0.955 + }, + { + "text": "is", + "start": 47.7, + "end": 47.74, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.74, + "end": 47.78, + "confidence": 0.982 + }, + { + "text": "one", + "start": 47.78, + "end": 47.82, + "confidence": 0.979 + }, + { + "text": "who", + "start": 47.82, + "end": 47.86, + "confidence": 0.953 + }, + { + "text": "is", + "start": 47.86, + "end": 47.9, + "confidence": 0.992 + }, + { + "text": "the", + "start": 47.9, + "end": 47.94, + "confidence": 0.982 + }, + { + "text": "one", + "start": 47.94, + "end": 47.98, + "confidence": 0.978 + }, + { + "text": "who", + "start": 47.98, + "end": 48.02, + "confidence": 0.952 + }, + { + "text": "is", + "start": 48.02, + "end": 48.06, + "confidence": 0.992 + }, + { + "text": "the", + "start": 48.06, + "end": 48.1, + "confidence": 0.981 + }, + { + "text": "one", + "start": 48.1, + "end": 48.14, + "confidence": 0.976 + }, + { + "text": "who", + "start": 48.14, + "end": 48.18, + "confidence": 0.95 + }, + { + "text": "is", + "start": 48.18, + "end": 48.22, + "confidence": 0.991 + }, + { + "text": "the", + "start": 48.22, + "end": 48.26, + "confidence": 0.98 + }, + { + "text": "one", + "start": 48.26, + "end": 48.3, + "confidence": 0.976 + }, + { + "text": "who", + "start": 48.3, + "end": 48.34, + "confidence": 0.949 + }, + { + "text": "is", + "start": 48.34, + "end": 48.38, + "confidence": 0.991 + }, + { + "text": "the", + "start": 48.38, + "end": 48.42, + "confidence": 0.979 + }, + { + "text": "one", + "start": 48.42, + "end": 48.46, + "confidence": 0.976 + }, + { + "text": "who", + "start": 48.46, + "end": 48.5, + "confidence": 0.944 + }, + { + "text": "is", + "start": 48.5, + "end": 48.54, + "confidence": 0.99 + }, + { + "text": "the", + "start": 48.54, + "end": 48.58, + "confidence": 0.979 + }, + { + "text": "one", + "start": 48.58, + "end": 48.62, + "confidence": 0.975 + }, + { + "text": "who", + "start": 48.62, + "end": 48.66, + "confidence": 0.945 + }, + { + "text": "is", + "start": 48.66, + "end": 67.0, + "confidence": 0.99 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/small.en/arabic.mp3.words.json b/tests/expected/small.en/arabic.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..190e702c32876986132ac46515c9177cad2b5b22 --- /dev/null +++ b/tests/expected/small.en/arabic.mp3.words.json @@ -0,0 +1,3348 @@ +{ + "text": " I am the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 2.92, + "end": 7.72, + "text": " I am the one who is the one who is the one who is the one who is the one who is the one", + "tokens": [ + 50363, + 314, + 716, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 50713 + ], + "temperature": 0.0, + "avg_logprob": -0.2332169426812066, + "compression_ratio": 24.294117647058822, + "no_speech_prob": 0.6509259939193726, + "confidence": 0.358, + "words": [ + { + "text": "I", + "start": 2.92, + "end": 3.16, + "confidence": 0.053 + }, + { + "text": "am", + "start": 3.16, + "end": 4.04, + "confidence": 0.152 + }, + { + "text": "the", + "start": 4.04, + "end": 5.44, + "confidence": 0.144 + }, + { + "text": "one", + "start": 5.44, + "end": 6.1, + "confidence": 0.086 + }, + { + "text": "who", + "start": 6.1, + "end": 6.18, + "confidence": 0.718 + }, + { + "text": "is", + "start": 6.18, + "end": 6.22, + "confidence": 0.119 + }, + { + "text": "the", + "start": 6.22, + "end": 6.24, + "confidence": 0.134 + }, + { + "text": "one", + "start": 6.24, + "end": 6.72, + "confidence": 0.203 + }, + { + "text": "who", + "start": 6.72, + "end": 6.74, + "confidence": 0.618 + }, + { + "text": "is", + "start": 6.74, + "end": 7.18, + "confidence": 0.319 + }, + { + "text": "the", + "start": 7.18, + "end": 7.2, + "confidence": 0.38 + }, + { + "text": "one", + "start": 7.2, + "end": 7.48, + "confidence": 0.443 + }, + { + "text": "who", + "start": 7.48, + "end": 7.5, + "confidence": 0.447 + }, + { + "text": "is", + "start": 7.5, + "end": 7.52, + "confidence": 0.518 + }, + { + "text": "the", + "start": 7.52, + "end": 7.54, + "confidence": 0.542 + }, + { + "text": "one", + "start": 7.54, + "end": 7.56, + "confidence": 0.724 + }, + { + "text": "who", + "start": 7.56, + "end": 7.58, + "confidence": 0.433 + }, + { + "text": "is", + "start": 7.58, + "end": 7.6, + "confidence": 0.677 + }, + { + "text": "the", + "start": 7.6, + "end": 7.62, + "confidence": 0.638 + }, + { + "text": "one", + "start": 7.62, + "end": 7.64, + "confidence": 0.817 + }, + { + "text": "who", + "start": 7.64, + "end": 7.66, + "confidence": 0.504 + }, + { + "text": "is", + "start": 7.66, + "end": 7.68, + "confidence": 0.74 + }, + { + "text": "the", + "start": 7.68, + "end": 7.7, + "confidence": 0.696 + }, + { + "text": "one", + "start": 7.7, + "end": 7.72, + "confidence": 0.697 + } + ] + }, + { + "id": 1, + "seek": 700, + "start": 7.72, + "end": 37.18, + "text": " who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the", + "tokens": [ + 50363, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262 + ], + "temperature": 0.0, + "avg_logprob": -0.07849001566569011, + "compression_ratio": 26.93548387096774, + "no_speech_prob": 0.6889318823814392, + "confidence": 0.925, + "words": [ + { + "text": "who", + "start": 7.72, + "end": 7.74, + "confidence": 0.214 + }, + { + "text": "is", + "start": 7.74, + "end": 7.76, + "confidence": 0.892 + }, + { + "text": "the", + "start": 7.76, + "end": 7.78, + "confidence": 0.918 + }, + { + "text": "one", + "start": 7.78, + "end": 9.96, + "confidence": 0.955 + }, + { + "text": "who", + "start": 9.96, + "end": 13.46, + "confidence": 0.905 + }, + { + "text": "is", + "start": 13.46, + "end": 14.0, + "confidence": 0.908 + }, + { + "text": "the", + "start": 14.0, + "end": 14.38, + "confidence": 0.942 + }, + { + "text": "one", + "start": 14.38, + "end": 14.4, + "confidence": 0.977 + }, + { + "text": "who", + "start": 14.4, + "end": 14.42, + "confidence": 0.759 + }, + { + "text": "is", + "start": 14.42, + "end": 14.44, + "confidence": 0.932 + }, + { + "text": "the", + "start": 14.44, + "end": 15.24, + "confidence": 0.957 + }, + { + "text": "one", + "start": 15.24, + "end": 18.0, + "confidence": 0.982 + }, + { + "text": "who", + "start": 18.0, + "end": 18.02, + "confidence": 0.582 + }, + { + "text": "is", + "start": 18.02, + "end": 18.04, + "confidence": 0.944 + }, + { + "text": "the", + "start": 18.04, + "end": 18.06, + "confidence": 0.953 + }, + { + "text": "one", + "start": 18.06, + "end": 18.08, + "confidence": 0.98 + }, + { + "text": "who", + "start": 18.08, + "end": 19.38, + "confidence": 0.578 + }, + { + "text": "is", + "start": 19.38, + "end": 19.9, + "confidence": 0.953 + }, + { + "text": "the", + "start": 19.9, + "end": 19.92, + "confidence": 0.95 + }, + { + "text": "one", + "start": 19.92, + "end": 19.94, + "confidence": 0.98 + }, + { + "text": "who", + "start": 19.94, + "end": 19.96, + "confidence": 0.592 + }, + { + "text": "is", + "start": 19.96, + "end": 20.88, + "confidence": 0.95 + }, + { + "text": "the", + "start": 20.88, + "end": 21.28, + "confidence": 0.872 + }, + { + "text": "one", + "start": 21.28, + "end": 21.52, + "confidence": 0.902 + }, + { + "text": "who", + "start": 21.52, + "end": 21.54, + "confidence": 0.5 + }, + { + "text": "is", + "start": 21.54, + "end": 22.04, + "confidence": 0.943 + }, + { + "text": "the", + "start": 22.04, + "end": 22.24, + "confidence": 0.898 + }, + { + "text": "one", + "start": 22.24, + "end": 25.3, + "confidence": 0.98 + }, + { + "text": "who", + "start": 25.3, + "end": 25.32, + "confidence": 0.542 + }, + { + "text": "is", + "start": 25.32, + "end": 26.18, + "confidence": 0.953 + }, + { + "text": "the", + "start": 26.18, + "end": 26.64, + "confidence": 0.904 + }, + { + "text": "one", + "start": 26.64, + "end": 26.92, + "confidence": 0.976 + }, + { + "text": "who", + "start": 26.92, + "end": 26.94, + "confidence": 0.587 + }, + { + "text": "is", + "start": 26.94, + "end": 27.42, + "confidence": 0.953 + }, + { + "text": "the", + "start": 27.42, + "end": 28.08, + "confidence": 0.903 + }, + { + "text": "one", + "start": 28.08, + "end": 28.1, + "confidence": 0.968 + }, + { + "text": "who", + "start": 28.1, + "end": 28.12, + "confidence": 0.604 + }, + { + "text": "is", + "start": 28.12, + "end": 28.14, + "confidence": 0.956 + }, + { + "text": "the", + "start": 28.14, + "end": 28.16, + "confidence": 0.91 + }, + { + "text": "one", + "start": 28.16, + "end": 28.46, + "confidence": 0.963 + }, + { + "text": "who", + "start": 28.46, + "end": 28.48, + "confidence": 0.623 + }, + { + "text": "is", + "start": 28.48, + "end": 28.5, + "confidence": 0.96 + }, + { + "text": "the", + "start": 28.5, + "end": 28.52, + "confidence": 0.92 + }, + { + "text": "one", + "start": 28.52, + "end": 30.08, + "confidence": 0.961 + }, + { + "text": "who", + "start": 30.08, + "end": 30.1, + "confidence": 0.65 + }, + { + "text": "is", + "start": 30.1, + "end": 30.12, + "confidence": 0.965 + }, + { + "text": "the", + "start": 30.12, + "end": 30.3, + "confidence": 0.93 + }, + { + "text": "one", + "start": 30.3, + "end": 30.32, + "confidence": 0.961 + }, + { + "text": "who", + "start": 30.32, + "end": 31.0, + "confidence": 0.672 + }, + { + "text": "is", + "start": 31.0, + "end": 31.52, + "confidence": 0.971 + }, + { + "text": "the", + "start": 31.52, + "end": 31.54, + "confidence": 0.94 + }, + { + "text": "one", + "start": 31.54, + "end": 32.26, + "confidence": 0.962 + }, + { + "text": "who", + "start": 32.26, + "end": 32.48, + "confidence": 0.704 + }, + { + "text": "is", + "start": 32.48, + "end": 33.26, + "confidence": 0.976 + }, + { + "text": "the", + "start": 33.26, + "end": 33.28, + "confidence": 0.946 + }, + { + "text": "one", + "start": 33.28, + "end": 33.56, + "confidence": 0.964 + }, + { + "text": "who", + "start": 33.56, + "end": 33.86, + "confidence": 0.723 + }, + { + "text": "is", + "start": 33.86, + "end": 33.88, + "confidence": 0.979 + }, + { + "text": "the", + "start": 33.88, + "end": 33.9, + "confidence": 0.951 + }, + { + "text": "one", + "start": 33.9, + "end": 33.92, + "confidence": 0.966 + }, + { + "text": "who", + "start": 33.92, + "end": 33.94, + "confidence": 0.747 + }, + { + "text": "is", + "start": 33.94, + "end": 33.96, + "confidence": 0.982 + }, + { + "text": "the", + "start": 33.96, + "end": 33.98, + "confidence": 0.956 + }, + { + "text": "one", + "start": 33.98, + "end": 34.0, + "confidence": 0.967 + }, + { + "text": "who", + "start": 34.0, + "end": 34.02, + "confidence": 0.757 + }, + { + "text": "is", + "start": 34.02, + "end": 34.04, + "confidence": 0.984 + }, + { + "text": "the", + "start": 34.04, + "end": 34.06, + "confidence": 0.961 + }, + { + "text": "one", + "start": 34.06, + "end": 34.08, + "confidence": 0.969 + }, + { + "text": "who", + "start": 34.08, + "end": 34.1, + "confidence": 0.776 + }, + { + "text": "is", + "start": 34.1, + "end": 34.12, + "confidence": 0.986 + }, + { + "text": "the", + "start": 34.12, + "end": 34.14, + "confidence": 0.965 + }, + { + "text": "one", + "start": 34.14, + "end": 34.16, + "confidence": 0.97 + }, + { + "text": "who", + "start": 34.16, + "end": 34.18, + "confidence": 0.8 + }, + { + "text": "is", + "start": 34.18, + "end": 34.2, + "confidence": 0.987 + }, + { + "text": "the", + "start": 34.2, + "end": 34.22, + "confidence": 0.968 + }, + { + "text": "one", + "start": 34.22, + "end": 34.24, + "confidence": 0.971 + }, + { + "text": "who", + "start": 34.24, + "end": 34.26, + "confidence": 0.82 + }, + { + "text": "is", + "start": 34.26, + "end": 34.28, + "confidence": 0.987 + }, + { + "text": "the", + "start": 34.28, + "end": 34.3, + "confidence": 0.969 + }, + { + "text": "one", + "start": 34.3, + "end": 34.32, + "confidence": 0.972 + }, + { + "text": "who", + "start": 34.32, + "end": 34.34, + "confidence": 0.836 + }, + { + "text": "is", + "start": 34.34, + "end": 34.36, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.36, + "end": 34.38, + "confidence": 0.971 + }, + { + "text": "one", + "start": 34.38, + "end": 34.4, + "confidence": 0.972 + }, + { + "text": "who", + "start": 34.4, + "end": 34.42, + "confidence": 0.872 + }, + { + "text": "is", + "start": 34.42, + "end": 34.44, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.44, + "end": 34.46, + "confidence": 0.972 + }, + { + "text": "one", + "start": 34.46, + "end": 34.48, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.48, + "end": 34.5, + "confidence": 0.889 + }, + { + "text": "is", + "start": 34.5, + "end": 34.52, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.52, + "end": 34.54, + "confidence": 0.973 + }, + { + "text": "one", + "start": 34.54, + "end": 34.56, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.56, + "end": 34.58, + "confidence": 0.899 + }, + { + "text": "is", + "start": 34.58, + "end": 34.6, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.6, + "end": 34.62, + "confidence": 0.973 + }, + { + "text": "one", + "start": 34.62, + "end": 34.64, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.64, + "end": 34.66, + "confidence": 0.912 + }, + { + "text": "is", + "start": 34.66, + "end": 34.68, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.68, + "end": 34.7, + "confidence": 0.973 + }, + { + "text": "one", + "start": 34.7, + "end": 34.72, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.72, + "end": 34.74, + "confidence": 0.921 + }, + { + "text": "is", + "start": 34.74, + "end": 34.76, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.76, + "end": 34.78, + "confidence": 0.974 + }, + { + "text": "one", + "start": 34.78, + "end": 34.8, + "confidence": 0.974 + }, + { + "text": "who", + "start": 34.8, + "end": 34.82, + "confidence": 0.927 + }, + { + "text": "is", + "start": 34.82, + "end": 34.84, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.84, + "end": 34.86, + "confidence": 0.974 + }, + { + "text": "one", + "start": 34.86, + "end": 34.88, + "confidence": 0.974 + }, + { + "text": "who", + "start": 34.88, + "end": 34.9, + "confidence": 0.935 + }, + { + "text": "is", + "start": 34.9, + "end": 34.92, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.92, + "end": 34.94, + "confidence": 0.974 + }, + { + "text": "one", + "start": 34.94, + "end": 34.96, + "confidence": 0.974 + }, + { + "text": "who", + "start": 34.96, + "end": 34.98, + "confidence": 0.934 + }, + { + "text": "is", + "start": 34.98, + "end": 35.0, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.0, + "end": 35.02, + "confidence": 0.975 + }, + { + "text": "one", + "start": 35.02, + "end": 35.04, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.04, + "end": 35.06, + "confidence": 0.937 + }, + { + "text": "is", + "start": 35.06, + "end": 35.08, + "confidence": 0.989 + }, + { + "text": "the", + "start": 35.08, + "end": 35.1, + "confidence": 0.976 + }, + { + "text": "one", + "start": 35.1, + "end": 35.12, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.12, + "end": 35.14, + "confidence": 0.937 + }, + { + "text": "is", + "start": 35.14, + "end": 35.16, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.16, + "end": 35.18, + "confidence": 0.976 + }, + { + "text": "one", + "start": 35.18, + "end": 35.2, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.2, + "end": 35.22, + "confidence": 0.934 + }, + { + "text": "is", + "start": 35.22, + "end": 35.24, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.24, + "end": 35.26, + "confidence": 0.976 + }, + { + "text": "one", + "start": 35.26, + "end": 35.28, + "confidence": 0.975 + }, + { + "text": "who", + "start": 35.28, + "end": 35.3, + "confidence": 0.933 + }, + { + "text": "is", + "start": 35.3, + "end": 35.32, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.32, + "end": 35.34, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.34, + "end": 35.36, + "confidence": 0.975 + }, + { + "text": "who", + "start": 35.36, + "end": 35.38, + "confidence": 0.93 + }, + { + "text": "is", + "start": 35.38, + "end": 35.4, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.4, + "end": 35.42, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.42, + "end": 35.44, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.44, + "end": 35.46, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.46, + "end": 35.48, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.48, + "end": 35.5, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.5, + "end": 35.52, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.52, + "end": 35.54, + "confidence": 0.926 + }, + { + "text": "is", + "start": 35.54, + "end": 35.56, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.56, + "end": 35.58, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.58, + "end": 35.6, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.6, + "end": 35.62, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.62, + "end": 35.64, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.64, + "end": 35.66, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.66, + "end": 35.68, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.68, + "end": 35.7, + "confidence": 0.926 + }, + { + "text": "is", + "start": 35.7, + "end": 35.72, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.72, + "end": 35.74, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.74, + "end": 35.76, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.76, + "end": 35.78, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.78, + "end": 35.8, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.8, + "end": 35.82, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.82, + "end": 35.84, + "confidence": 0.973 + }, + { + "text": "who", + "start": 35.84, + "end": 35.86, + "confidence": 0.926 + }, + { + "text": "is", + "start": 35.86, + "end": 35.88, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.88, + "end": 35.9, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.9, + "end": 35.92, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.92, + "end": 35.94, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.94, + "end": 35.96, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.96, + "end": 35.98, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.98, + "end": 36.0, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.0, + "end": 36.02, + "confidence": 0.925 + }, + { + "text": "is", + "start": 36.02, + "end": 36.04, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.04, + "end": 36.06, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.06, + "end": 36.08, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.08, + "end": 36.1, + "confidence": 0.925 + }, + { + "text": "is", + "start": 36.1, + "end": 36.12, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.12, + "end": 36.14, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.14, + "end": 36.16, + "confidence": 0.974 + }, + { + "text": "who", + "start": 36.16, + "end": 36.18, + "confidence": 0.927 + }, + { + "text": "is", + "start": 36.18, + "end": 36.2, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.2, + "end": 36.22, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.22, + "end": 36.24, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.24, + "end": 36.26, + "confidence": 0.923 + }, + { + "text": "is", + "start": 36.26, + "end": 36.28, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.28, + "end": 36.3, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.3, + "end": 36.32, + "confidence": 0.972 + }, + { + "text": "who", + "start": 36.32, + "end": 36.34, + "confidence": 0.924 + }, + { + "text": "is", + "start": 36.34, + "end": 36.36, + "confidence": 0.986 + }, + { + "text": "the", + "start": 36.36, + "end": 36.38, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.38, + "end": 36.4, + "confidence": 0.972 + }, + { + "text": "who", + "start": 36.4, + "end": 36.42, + "confidence": 0.922 + }, + { + "text": "is", + "start": 36.42, + "end": 36.44, + "confidence": 0.985 + }, + { + "text": "the", + "start": 36.44, + "end": 36.46, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.46, + "end": 36.48, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.48, + "end": 36.5, + "confidence": 0.924 + }, + { + "text": "is", + "start": 36.5, + "end": 36.52, + "confidence": 0.985 + }, + { + "text": "the", + "start": 36.52, + "end": 36.54, + "confidence": 0.975 + }, + { + "text": "one", + "start": 36.54, + "end": 36.56, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.56, + "end": 36.58, + "confidence": 0.92 + }, + { + "text": "is", + "start": 36.58, + "end": 36.6, + "confidence": 0.983 + }, + { + "text": "the", + "start": 36.6, + "end": 36.62, + "confidence": 0.975 + }, + { + "text": "one", + "start": 36.62, + "end": 36.64, + "confidence": 0.971 + }, + { + "text": "who", + "start": 36.64, + "end": 36.66, + "confidence": 0.921 + }, + { + "text": "is", + "start": 36.66, + "end": 36.68, + "confidence": 0.983 + }, + { + "text": "the", + "start": 36.68, + "end": 36.7, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.7, + "end": 36.72, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.72, + "end": 36.74, + "confidence": 0.918 + }, + { + "text": "is", + "start": 36.74, + "end": 36.76, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.76, + "end": 36.78, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.78, + "end": 36.8, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.8, + "end": 36.82, + "confidence": 0.919 + }, + { + "text": "is", + "start": 36.82, + "end": 36.84, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.84, + "end": 36.86, + "confidence": 0.973 + }, + { + "text": "one", + "start": 36.86, + "end": 36.88, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.88, + "end": 36.9, + "confidence": 0.917 + }, + { + "text": "is", + "start": 36.9, + "end": 36.92, + "confidence": 0.981 + }, + { + "text": "the", + "start": 36.92, + "end": 36.94, + "confidence": 0.973 + }, + { + "text": "one", + "start": 36.94, + "end": 36.96, + "confidence": 0.972 + }, + { + "text": "who", + "start": 36.96, + "end": 36.98, + "confidence": 0.919 + }, + { + "text": "is", + "start": 36.98, + "end": 37.0, + "confidence": 0.98 + }, + { + "text": "the", + "start": 37.0, + "end": 37.02, + "confidence": 0.972 + }, + { + "text": "one", + "start": 37.02, + "end": 37.04, + "confidence": 0.972 + }, + { + "text": "who", + "start": 37.04, + "end": 37.06, + "confidence": 0.918 + }, + { + "text": "is", + "start": 37.06, + "end": 37.08, + "confidence": 0.979 + }, + { + "text": "the", + "start": 37.08, + "end": 37.1, + "confidence": 0.972 + }, + { + "text": "one", + "start": 37.1, + "end": 37.12, + "confidence": 0.972 + }, + { + "text": "who", + "start": 37.12, + "end": 37.14, + "confidence": 0.917 + }, + { + "text": "is", + "start": 37.14, + "end": 37.16, + "confidence": 0.978 + }, + { + "text": "the", + "start": 37.16, + "end": 37.18, + "confidence": 0.971 + } + ] + }, + { + "id": 2, + "seek": 3700, + "start": 37.18, + "end": 66.98, + "text": " one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "tokens": [ + 50363, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318 + ], + "temperature": 0.0, + "avg_logprob": -0.024528507656521266, + "compression_ratio": 26.93548387096774, + "no_speech_prob": 0.09981618076562881, + "confidence": 0.976, + "words": [ + { + "text": "one", + "start": 37.18, + "end": 37.32, + "confidence": 0.959 + }, + { + "text": "who", + "start": 37.32, + "end": 37.9, + "confidence": 0.954 + }, + { + "text": "is", + "start": 37.9, + "end": 37.92, + "confidence": 0.968 + }, + { + "text": "the", + "start": 37.92, + "end": 38.1, + "confidence": 0.962 + }, + { + "text": "one", + "start": 38.1, + "end": 38.6, + "confidence": 0.965 + }, + { + "text": "who", + "start": 38.6, + "end": 39.36, + "confidence": 0.979 + }, + { + "text": "is", + "start": 39.36, + "end": 39.58, + "confidence": 0.986 + }, + { + "text": "the", + "start": 39.58, + "end": 39.6, + "confidence": 0.969 + }, + { + "text": "one", + "start": 39.6, + "end": 39.62, + "confidence": 0.981 + }, + { + "text": "who", + "start": 39.62, + "end": 39.82, + "confidence": 0.985 + }, + { + "text": "is", + "start": 39.82, + "end": 40.12, + "confidence": 0.99 + }, + { + "text": "the", + "start": 40.12, + "end": 40.32, + "confidence": 0.957 + }, + { + "text": "one", + "start": 40.32, + "end": 40.62, + "confidence": 0.983 + }, + { + "text": "who", + "start": 40.62, + "end": 40.64, + "confidence": 0.984 + }, + { + "text": "is", + "start": 40.64, + "end": 40.66, + "confidence": 0.99 + }, + { + "text": "the", + "start": 40.66, + "end": 40.68, + "confidence": 0.956 + }, + { + "text": "one", + "start": 40.68, + "end": 40.74, + "confidence": 0.984 + }, + { + "text": "who", + "start": 40.74, + "end": 40.76, + "confidence": 0.984 + }, + { + "text": "is", + "start": 40.76, + "end": 40.78, + "confidence": 0.991 + }, + { + "text": "the", + "start": 40.78, + "end": 40.8, + "confidence": 0.953 + }, + { + "text": "one", + "start": 40.8, + "end": 40.82, + "confidence": 0.985 + }, + { + "text": "who", + "start": 40.82, + "end": 40.84, + "confidence": 0.979 + }, + { + "text": "is", + "start": 40.84, + "end": 40.86, + "confidence": 0.987 + }, + { + "text": "the", + "start": 40.86, + "end": 40.88, + "confidence": 0.495 + }, + { + "text": "one", + "start": 40.88, + "end": 41.5, + "confidence": 0.951 + }, + { + "text": "who", + "start": 41.5, + "end": 41.52, + "confidence": 0.968 + }, + { + "text": "is", + "start": 41.52, + "end": 41.54, + "confidence": 0.983 + }, + { + "text": "the", + "start": 41.54, + "end": 41.56, + "confidence": 0.844 + }, + { + "text": "one", + "start": 41.56, + "end": 41.58, + "confidence": 0.982 + }, + { + "text": "who", + "start": 41.58, + "end": 41.6, + "confidence": 0.973 + }, + { + "text": "is", + "start": 41.6, + "end": 41.62, + "confidence": 0.991 + }, + { + "text": "the", + "start": 41.62, + "end": 41.64, + "confidence": 0.888 + }, + { + "text": "one", + "start": 41.64, + "end": 41.66, + "confidence": 0.984 + }, + { + "text": "who", + "start": 41.66, + "end": 41.68, + "confidence": 0.978 + }, + { + "text": "is", + "start": 41.68, + "end": 41.7, + "confidence": 0.994 + }, + { + "text": "the", + "start": 41.7, + "end": 41.72, + "confidence": 0.915 + }, + { + "text": "one", + "start": 41.72, + "end": 41.74, + "confidence": 0.985 + }, + { + "text": "who", + "start": 41.74, + "end": 41.76, + "confidence": 0.981 + }, + { + "text": "is", + "start": 41.76, + "end": 41.78, + "confidence": 0.995 + }, + { + "text": "the", + "start": 41.78, + "end": 41.8, + "confidence": 0.937 + }, + { + "text": "one", + "start": 41.8, + "end": 41.82, + "confidence": 0.986 + }, + { + "text": "who", + "start": 41.82, + "end": 41.84, + "confidence": 0.983 + }, + { + "text": "is", + "start": 41.84, + "end": 41.86, + "confidence": 0.996 + }, + { + "text": "the", + "start": 41.86, + "end": 41.88, + "confidence": 0.95 + }, + { + "text": "one", + "start": 41.88, + "end": 41.9, + "confidence": 0.987 + }, + { + "text": "who", + "start": 41.9, + "end": 41.92, + "confidence": 0.983 + }, + { + "text": "is", + "start": 41.92, + "end": 41.94, + "confidence": 0.996 + }, + { + "text": "the", + "start": 41.94, + "end": 41.96, + "confidence": 0.961 + }, + { + "text": "one", + "start": 41.96, + "end": 41.98, + "confidence": 0.987 + }, + { + "text": "who", + "start": 41.98, + "end": 42.0, + "confidence": 0.984 + }, + { + "text": "is", + "start": 42.0, + "end": 42.02, + "confidence": 0.997 + }, + { + "text": "the", + "start": 42.02, + "end": 42.04, + "confidence": 0.969 + }, + { + "text": "one", + "start": 42.04, + "end": 42.06, + "confidence": 0.987 + }, + { + "text": "who", + "start": 42.06, + "end": 42.08, + "confidence": 0.983 + }, + { + "text": "is", + "start": 42.08, + "end": 42.1, + "confidence": 0.997 + }, + { + "text": "the", + "start": 42.1, + "end": 42.12, + "confidence": 0.975 + }, + { + "text": "one", + "start": 42.12, + "end": 42.14, + "confidence": 0.988 + }, + { + "text": "who", + "start": 42.14, + "end": 42.16, + "confidence": 0.983 + }, + { + "text": "is", + "start": 42.16, + "end": 42.18, + "confidence": 0.997 + }, + { + "text": "the", + "start": 42.18, + "end": 42.2, + "confidence": 0.979 + }, + { + "text": "one", + "start": 42.2, + "end": 42.22, + "confidence": 0.989 + }, + { + "text": "who", + "start": 42.22, + "end": 42.24, + "confidence": 0.983 + }, + { + "text": "is", + "start": 42.24, + "end": 42.26, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.26, + "end": 42.28, + "confidence": 0.982 + }, + { + "text": "one", + "start": 42.28, + "end": 42.3, + "confidence": 0.989 + }, + { + "text": "who", + "start": 42.3, + "end": 42.32, + "confidence": 0.982 + }, + { + "text": "is", + "start": 42.32, + "end": 42.34, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.34, + "end": 42.4, + "confidence": 0.983 + }, + { + "text": "one", + "start": 42.4, + "end": 42.56, + "confidence": 0.989 + }, + { + "text": "who", + "start": 42.56, + "end": 42.58, + "confidence": 0.982 + }, + { + "text": "is", + "start": 42.58, + "end": 42.62, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.62, + "end": 42.64, + "confidence": 0.985 + }, + { + "text": "one", + "start": 42.64, + "end": 43.62, + "confidence": 0.989 + }, + { + "text": "who", + "start": 43.62, + "end": 43.64, + "confidence": 0.981 + }, + { + "text": "is", + "start": 43.64, + "end": 43.66, + "confidence": 0.998 + }, + { + "text": "the", + "start": 43.66, + "end": 43.68, + "confidence": 0.986 + }, + { + "text": "one", + "start": 43.68, + "end": 43.7, + "confidence": 0.989 + }, + { + "text": "who", + "start": 43.7, + "end": 43.72, + "confidence": 0.981 + }, + { + "text": "is", + "start": 43.72, + "end": 43.74, + "confidence": 0.998 + }, + { + "text": "the", + "start": 43.74, + "end": 43.76, + "confidence": 0.987 + }, + { + "text": "one", + "start": 43.76, + "end": 44.02, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.02, + "end": 44.04, + "confidence": 0.981 + }, + { + "text": "is", + "start": 44.04, + "end": 44.06, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.06, + "end": 44.08, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.08, + "end": 44.1, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.1, + "end": 44.12, + "confidence": 0.981 + }, + { + "text": "is", + "start": 44.12, + "end": 44.14, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.14, + "end": 44.16, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.16, + "end": 44.18, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.18, + "end": 44.2, + "confidence": 0.981 + }, + { + "text": "is", + "start": 44.2, + "end": 44.22, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.22, + "end": 44.24, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.24, + "end": 44.26, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.26, + "end": 44.28, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.28, + "end": 44.3, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.3, + "end": 44.32, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.32, + "end": 44.34, + "confidence": 0.988 + }, + { + "text": "who", + "start": 44.34, + "end": 44.36, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.36, + "end": 44.38, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.38, + "end": 44.4, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.4, + "end": 44.42, + "confidence": 0.988 + }, + { + "text": "who", + "start": 44.42, + "end": 44.44, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.44, + "end": 44.46, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.46, + "end": 44.48, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.48, + "end": 44.54, + "confidence": 0.988 + }, + { + "text": "who", + "start": 44.54, + "end": 44.56, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.56, + "end": 44.58, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.58, + "end": 44.6, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.6, + "end": 45.16, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.16, + "end": 45.18, + "confidence": 0.98 + }, + { + "text": "is", + "start": 45.18, + "end": 45.2, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.2, + "end": 45.22, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.22, + "end": 45.24, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.24, + "end": 45.26, + "confidence": 0.979 + }, + { + "text": "is", + "start": 45.26, + "end": 45.28, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.28, + "end": 45.3, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.3, + "end": 45.32, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.32, + "end": 45.34, + "confidence": 0.978 + }, + { + "text": "is", + "start": 45.34, + "end": 45.36, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.36, + "end": 45.38, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.38, + "end": 45.4, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.4, + "end": 45.42, + "confidence": 0.977 + }, + { + "text": "is", + "start": 45.42, + "end": 45.44, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.44, + "end": 45.46, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.46, + "end": 45.94, + "confidence": 0.985 + }, + { + "text": "who", + "start": 45.94, + "end": 45.96, + "confidence": 0.975 + }, + { + "text": "is", + "start": 45.96, + "end": 45.98, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.98, + "end": 46.0, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.0, + "end": 46.02, + "confidence": 0.985 + }, + { + "text": "who", + "start": 46.02, + "end": 46.04, + "confidence": 0.976 + }, + { + "text": "is", + "start": 46.04, + "end": 46.06, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.06, + "end": 46.08, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.08, + "end": 46.1, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.1, + "end": 46.12, + "confidence": 0.975 + }, + { + "text": "is", + "start": 46.12, + "end": 46.14, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.14, + "end": 46.16, + "confidence": 0.987 + }, + { + "text": "one", + "start": 46.16, + "end": 46.18, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.18, + "end": 46.2, + "confidence": 0.971 + }, + { + "text": "is", + "start": 46.2, + "end": 46.22, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.22, + "end": 46.24, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.24, + "end": 46.26, + "confidence": 0.983 + }, + { + "text": "who", + "start": 46.26, + "end": 46.28, + "confidence": 0.971 + }, + { + "text": "is", + "start": 46.28, + "end": 46.3, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.3, + "end": 46.32, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.32, + "end": 46.84, + "confidence": 0.982 + }, + { + "text": "who", + "start": 46.84, + "end": 46.86, + "confidence": 0.969 + }, + { + "text": "is", + "start": 46.86, + "end": 46.88, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.88, + "end": 46.9, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.9, + "end": 46.92, + "confidence": 0.982 + }, + { + "text": "who", + "start": 46.92, + "end": 46.94, + "confidence": 0.966 + }, + { + "text": "is", + "start": 46.94, + "end": 46.96, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.96, + "end": 46.98, + "confidence": 0.985 + }, + { + "text": "one", + "start": 46.98, + "end": 47.1, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.1, + "end": 47.12, + "confidence": 0.966 + }, + { + "text": "is", + "start": 47.12, + "end": 47.14, + "confidence": 0.995 + }, + { + "text": "the", + "start": 47.14, + "end": 47.16, + "confidence": 0.985 + }, + { + "text": "one", + "start": 47.16, + "end": 47.18, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.18, + "end": 47.2, + "confidence": 0.963 + }, + { + "text": "is", + "start": 47.2, + "end": 47.22, + "confidence": 0.995 + }, + { + "text": "the", + "start": 47.22, + "end": 47.24, + "confidence": 0.984 + }, + { + "text": "one", + "start": 47.24, + "end": 47.26, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.26, + "end": 47.28, + "confidence": 0.959 + }, + { + "text": "is", + "start": 47.28, + "end": 47.3, + "confidence": 0.994 + }, + { + "text": "the", + "start": 47.3, + "end": 47.32, + "confidence": 0.983 + }, + { + "text": "one", + "start": 47.32, + "end": 47.34, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.34, + "end": 47.36, + "confidence": 0.958 + }, + { + "text": "is", + "start": 47.36, + "end": 47.38, + "confidence": 0.994 + }, + { + "text": "the", + "start": 47.38, + "end": 47.4, + "confidence": 0.983 + }, + { + "text": "one", + "start": 47.4, + "end": 47.6, + "confidence": 0.979 + }, + { + "text": "who", + "start": 47.6, + "end": 47.62, + "confidence": 0.955 + }, + { + "text": "is", + "start": 47.62, + "end": 47.64, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.64, + "end": 47.66, + "confidence": 0.982 + }, + { + "text": "one", + "start": 47.66, + "end": 47.68, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.68, + "end": 47.7, + "confidence": 0.956 + }, + { + "text": "is", + "start": 47.7, + "end": 47.72, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.72, + "end": 47.74, + "confidence": 0.981 + }, + { + "text": "one", + "start": 47.74, + "end": 47.76, + "confidence": 0.978 + }, + { + "text": "who", + "start": 47.76, + "end": 47.78, + "confidence": 0.949 + }, + { + "text": "is", + "start": 47.78, + "end": 47.8, + "confidence": 0.992 + }, + { + "text": "the", + "start": 47.8, + "end": 47.82, + "confidence": 0.98 + }, + { + "text": "one", + "start": 47.82, + "end": 49.1, + "confidence": 0.978 + }, + { + "text": "who", + "start": 49.1, + "end": 49.12, + "confidence": 0.948 + }, + { + "text": "is", + "start": 49.12, + "end": 49.14, + "confidence": 0.991 + }, + { + "text": "the", + "start": 49.14, + "end": 49.16, + "confidence": 0.98 + }, + { + "text": "one", + "start": 49.16, + "end": 49.26, + "confidence": 0.978 + }, + { + "text": "who", + "start": 49.26, + "end": 49.28, + "confidence": 0.949 + }, + { + "text": "is", + "start": 49.28, + "end": 49.3, + "confidence": 0.991 + }, + { + "text": "the", + "start": 49.3, + "end": 49.32, + "confidence": 0.979 + }, + { + "text": "one", + "start": 49.32, + "end": 49.34, + "confidence": 0.977 + }, + { + "text": "who", + "start": 49.34, + "end": 49.36, + "confidence": 0.944 + }, + { + "text": "is", + "start": 49.36, + "end": 49.38, + "confidence": 0.99 + }, + { + "text": "the", + "start": 49.38, + "end": 49.4, + "confidence": 0.977 + }, + { + "text": "one", + "start": 49.4, + "end": 49.42, + "confidence": 0.976 + }, + { + "text": "who", + "start": 49.42, + "end": 49.44, + "confidence": 0.94 + }, + { + "text": "is", + "start": 49.44, + "end": 49.46, + "confidence": 0.989 + }, + { + "text": "the", + "start": 49.46, + "end": 49.48, + "confidence": 0.977 + }, + { + "text": "one", + "start": 49.48, + "end": 49.5, + "confidence": 0.975 + }, + { + "text": "who", + "start": 49.5, + "end": 49.52, + "confidence": 0.94 + }, + { + "text": "is", + "start": 49.52, + "end": 49.54, + "confidence": 0.989 + }, + { + "text": "the", + "start": 49.54, + "end": 49.56, + "confidence": 0.975 + }, + { + "text": "one", + "start": 49.56, + "end": 49.58, + "confidence": 0.975 + }, + { + "text": "who", + "start": 49.58, + "end": 49.6, + "confidence": 0.935 + }, + { + "text": "is", + "start": 49.6, + "end": 49.62, + "confidence": 0.987 + }, + { + "text": "the", + "start": 49.62, + "end": 49.64, + "confidence": 0.974 + }, + { + "text": "one", + "start": 49.64, + "end": 49.66, + "confidence": 0.975 + }, + { + "text": "who", + "start": 49.66, + "end": 49.68, + "confidence": 0.936 + }, + { + "text": "is", + "start": 49.68, + "end": 49.7, + "confidence": 0.988 + }, + { + "text": "the", + "start": 49.7, + "end": 49.72, + "confidence": 0.974 + }, + { + "text": "one", + "start": 49.72, + "end": 49.74, + "confidence": 0.971 + }, + { + "text": "who", + "start": 49.74, + "end": 49.76, + "confidence": 0.932 + }, + { + "text": "is", + "start": 49.76, + "end": 49.78, + "confidence": 0.986 + }, + { + "text": "the", + "start": 49.78, + "end": 49.8, + "confidence": 0.971 + }, + { + "text": "one", + "start": 49.8, + "end": 49.82, + "confidence": 0.971 + }, + { + "text": "who", + "start": 49.82, + "end": 49.84, + "confidence": 0.928 + }, + { + "text": "is", + "start": 49.84, + "end": 49.86, + "confidence": 0.986 + }, + { + "text": "the", + "start": 49.86, + "end": 49.88, + "confidence": 0.971 + }, + { + "text": "one", + "start": 49.88, + "end": 49.9, + "confidence": 0.971 + }, + { + "text": "who", + "start": 49.9, + "end": 49.92, + "confidence": 0.923 + }, + { + "text": "is", + "start": 49.92, + "end": 49.94, + "confidence": 0.985 + }, + { + "text": "the", + "start": 49.94, + "end": 49.96, + "confidence": 0.97 + }, + { + "text": "one", + "start": 49.96, + "end": 66.64, + "confidence": 0.969 + }, + { + "text": "who", + "start": 66.64, + "end": 66.66, + "confidence": 0.925 + }, + { + "text": "is", + "start": 66.66, + "end": 66.98, + "confidence": 0.985 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/split_subtitles/punctuations.mp3_20.srt b/tests/expected/split_subtitles/punctuations.mp3_20.srt new file mode 100644 index 0000000000000000000000000000000000000000..607f0125cf1df1d150f8432a5ce5ad72669143df --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_20.srt @@ -0,0 +1,12 @@ +1 +00:00:00,400 --> 00:00:01,360 +Dis-moi, + +2 +00:00:01,360 --> 00:00:02,020 +est-ce que l'avion + +3 +00:00:02,020 --> 00:00:02,340 +vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_20.vtt b/tests/expected/split_subtitles/punctuations.mp3_20.vtt new file mode 100644 index 0000000000000000000000000000000000000000..d06e85098725e851017bd86f94fa529b5c66d030 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_20.vtt @@ -0,0 +1,11 @@ +WEBVTT + +00:00.400 --> 00:01.360 +Dis-moi, + +00:01.360 --> 00:02.020 +est-ce que l'avion + +00:02.020 --> 00:02.340 +vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_50.srt b/tests/expected/split_subtitles/punctuations.mp3_50.srt new file mode 100644 index 0000000000000000000000000000000000000000..ffc9a2b42b2c510691ec27b303df2c88825c0aa4 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_50.srt @@ -0,0 +1,4 @@ +1 +00:00:00,400 --> 00:00:02,340 +Dis-moi, est-ce que l'avion vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_50.vtt b/tests/expected/split_subtitles/punctuations.mp3_50.vtt new file mode 100644 index 0000000000000000000000000000000000000000..83c7bd9a0253ca6a8d117803fac3a2bdc3b9e525 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_50.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.400 --> 00:02.340 +Dis-moi, est-ce que l'avion vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_6.srt b/tests/expected/split_subtitles/punctuations.mp3_6.srt new file mode 100644 index 0000000000000000000000000000000000000000..0caea3d7e82a647c78c154e2d1879292e5442c9d --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_6.srt @@ -0,0 +1,20 @@ +1 +00:00:00,400 --> 00:00:01,360 +Dis-moi, + +2 +00:00:01,360 --> 00:00:01,520 +est-ce + +3 +00:00:01,520 --> 00:00:01,640 +que + +4 +00:00:01,640 --> 00:00:02,020 +l'avion + +5 +00:00:02,020 --> 00:00:02,340 +vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_6.vtt b/tests/expected/split_subtitles/punctuations.mp3_6.vtt new file mode 100644 index 0000000000000000000000000000000000000000..f255e42075a222812432fc82fe6622c86de71015 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_6.vtt @@ -0,0 +1,17 @@ +WEBVTT + +00:00.400 --> 00:01.360 +Dis-moi, + +00:01.360 --> 00:01.520 +est-ce + +00:01.520 --> 00:01.640 +que + +00:01.640 --> 00:02.020 +l'avion + +00:02.020 --> 00:02.340 +vole? + diff --git a/tests/expected/split_subtitles/smartphone.mp3_20.srt b/tests/expected/split_subtitles/smartphone.mp3_20.srt new file mode 100644 index 0000000000000000000000000000000000000000..22c039440f6cb0beda8f9ccf043d766a717ac78a --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_20.srt @@ -0,0 +1,812 @@ +1 +00:00:00,380 --> 00:00:01,080 +C'est évident ce que + +2 +00:00:01,080 --> 00:00:01,780 +dit Nicolas, + +3 +00:00:01,780 --> 00:00:02,380 +mais je ne me + +4 +00:00:02,380 --> 00:00:02,840 +l'étais jamais + +5 +00:00:02,840 --> 00:00:03,620 +formulé comme ça. + +6 +00:00:04,080 --> 00:00:05,000 +Ce qui fait la force + +7 +00:00:05,000 --> 00:00:05,880 +du smartphone, + +8 +00:00:05,880 --> 00:00:06,520 +c'est pas seulement + +9 +00:00:06,520 --> 00:00:07,560 +l'accumulation des + +10 +00:00:07,560 --> 00:00:07,920 +fonctions, + +11 +00:00:08,320 --> 00:00:09,100 +mais la manière dont + +12 +00:00:09,100 --> 00:00:10,320 +elles interagissent + +13 +00:00:10,320 --> 00:00:10,880 +entre elles. + +14 +00:00:10,960 --> 00:00:11,400 +Ce qui dit + +15 +00:00:11,400 --> 00:00:11,780 +d'ailleurs sur la + +16 +00:00:11,780 --> 00:00:12,120 +photo, + +17 +00:00:12,120 --> 00:00:13,000 +c'est hyper convaincant. + +18 +00:00:13,340 --> 00:00:14,340 +Alors évidemment, + +19 +00:00:14,340 --> 00:00:15,160 +il faudrait ajouter + +20 +00:00:15,160 --> 00:00:16,020 +les interfaces. + +21 +00:00:16,220 --> 00:00:17,260 +L'écran tactile a + +22 +00:00:17,260 --> 00:00:18,620 +été beaucoup très + +23 +00:00:18,620 --> 00:00:19,360 +souvent mentionné. + +24 +00:00:19,840 --> 00:00:20,520 +Mais bon, + +25 +00:00:20,520 --> 00:00:20,960 +il faut dire qu'il + +26 +00:00:20,960 --> 00:00:22,100 +profite aussi de 20 + +27 +00:00:22,100 --> 00:00:22,920 +ans pendant lesquels + +28 +00:00:22,920 --> 00:00:23,720 +les ordinateurs nous + +29 +00:00:23,720 --> 00:00:24,500 +ont appris à cliquer + +30 +00:00:24,500 --> 00:00:25,260 +sur des icônes. + +31 +00:00:25,420 --> 00:00:26,660 +Sauf que le + +32 +00:00:26,660 --> 00:00:27,620 +smartphone ajoute le + +33 +00:00:27,620 --> 00:00:28,180 +toucher, + +34 +00:00:28,180 --> 00:00:28,680 +ce qui rend le + +35 +00:00:28,680 --> 00:00:30,220 +contact plus direct, + +36 +00:00:30,220 --> 00:00:30,640 +plus sensible. + +37 +00:00:31,040 --> 00:00:31,700 +Et puis évidemment, + +38 +00:00:31,700 --> 00:00:32,120 +il faudrait parler + +39 +00:00:32,120 --> 00:00:32,480 +aussi des + +40 +00:00:32,480 --> 00:00:33,180 +applications qui + +41 +00:00:33,180 --> 00:00:33,960 +permettent de + +42 +00:00:33,960 --> 00:00:34,800 +contourner le côté + +43 +00:00:34,800 --> 00:00:35,780 +touffu de la + +44 +00:00:35,780 --> 00:00:36,780 +navigation web pour + +45 +00:00:36,780 --> 00:00:37,680 +aller directement au + +46 +00:00:37,680 --> 00:00:37,820 +but. + +47 +00:00:37,820 --> 00:00:39,420 +Bref, tout ça, + +48 +00:00:39,420 --> 00:00:40,160 +ce sont les + +49 +00:00:40,160 --> 00:00:40,960 +conditions qui + +50 +00:00:40,960 --> 00:00:42,060 +permettent de créer + +51 +00:00:42,060 --> 00:00:42,800 +cet objet dont + +52 +00:00:42,800 --> 00:00:43,700 +Nicolas dit qu'il + +53 +00:00:43,700 --> 00:00:43,880 +est + +54 +00:00:43,880 --> 00:00:44,980 +vraisemblablement + +55 +00:00:44,980 --> 00:00:45,700 +inédit dans + +56 +00:00:45,700 --> 00:00:46,180 +l'histoire de + +57 +00:00:46,180 --> 00:00:46,580 +l'humanité. + +58 +00:00:46,600 --> 00:00:47,720 +Mais ça, + +59 +00:00:47,720 --> 00:00:48,260 +ça soulève une autre + +60 +00:00:48,260 --> 00:00:48,820 +interrogation. + +61 +00:00:49,220 --> 00:00:49,980 +Est-ce que le fait + +62 +00:00:49,980 --> 00:00:51,120 +que cet objet soit + +63 +00:00:51,120 --> 00:00:52,420 +inédit induit que + +64 +00:00:52,420 --> 00:00:53,660 +notre rapport à lui + +65 +00:00:53,660 --> 00:00:55,000 +est aussi un rapport + +66 +00:00:55,000 --> 00:00:55,460 +inédit? + +67 +00:00:55,460 --> 00:00:56,240 +Je veux dire, + +68 +00:00:56,240 --> 00:00:56,580 +est-ce que le + +69 +00:00:56,580 --> 00:00:57,280 +rapport qu'on a au + +70 +00:00:57,280 --> 00:00:57,920 +smartphone est + +71 +00:00:57,920 --> 00:00:58,660 +comparable à celui + +72 +00:00:58,660 --> 00:00:59,460 +qu'on entretenait à + +73 +00:00:59,460 --> 00:00:59,960 +d'autres objets + +74 +00:00:59,960 --> 00:01:01,500 +techniques comme la + +75 +00:01:01,500 --> 00:01:02,680 +voiture ou le + +76 +00:01:02,680 --> 00:01:03,120 +téléphone? + +77 +00:01:03,360 --> 00:01:05,660 +Il n'y a pas + +78 +00:01:05,660 --> 00:01:06,420 +d'équivalent en + +79 +00:01:06,420 --> 00:01:06,660 +fait. + +80 +00:01:06,880 --> 00:01:07,540 +Et donc cette espèce + +81 +00:01:07,540 --> 00:01:08,940 +de nouveauté dans la + +82 +00:01:08,940 --> 00:01:10,240 +relation à l'objet, + +83 +00:01:10,240 --> 00:01:10,760 +c'est fascinant et + +84 +00:01:10,760 --> 00:01:11,520 +terrifiant. + +85 +00:01:11,620 --> 00:01:12,440 +Parce qu'on a + +86 +00:01:12,440 --> 00:01:13,560 +l'impression, + +87 +00:01:13,560 --> 00:01:14,400 +comme le disent les + +88 +00:01:14,400 --> 00:01:15,000 +utilisateurs et les + +89 +00:01:15,000 --> 00:01:15,220 +services, + +90 +00:01:15,220 --> 00:01:16,080 +d'être dépendants de + +91 +00:01:16,080 --> 00:01:16,480 +cet objet, + +92 +00:01:16,860 --> 00:01:17,340 +d'induire en fait + +93 +00:01:17,340 --> 00:01:18,480 +une espèce de + +94 +00:01:18,480 --> 00:01:18,600 +relation, + +95 +00:01:18,600 --> 00:01:19,920 +de médiation avec le + +96 +00:01:19,920 --> 00:01:21,780 +monde qui rend de + +97 +00:01:21,780 --> 00:01:22,240 +l'ampleur et qui + +98 +00:01:22,240 --> 00:01:22,720 +amène aussi à des + +99 +00:01:22,720 --> 00:01:23,260 +formes de rejet. + +100 +00:01:23,940 --> 00:01:24,940 +Donc, + +101 +00:01:24,940 --> 00:01:26,560 +à objet inédit, + +102 +00:01:26,560 --> 00:01:27,800 +rapport inédit. + +103 +00:01:28,020 --> 00:01:29,280 +Et ce rapport, + +104 +00:01:29,280 --> 00:01:29,880 +si j'en crois + +105 +00:01:29,880 --> 00:01:30,540 +Nicolas, + +106 +00:01:30,540 --> 00:01:31,800 +serait caractérisé + +107 +00:01:31,800 --> 00:01:33,400 +par un mélange de + +108 +00:01:33,400 --> 00:01:34,680 +dépendance et de + +109 +00:01:34,680 --> 00:01:35,140 +rejet. + +110 +00:01:35,780 --> 00:01:37,140 +Bon, en vrai, + +111 +00:01:37,140 --> 00:01:38,080 +il faudrait remonter + +112 +00:01:38,080 --> 00:01:39,320 +très très finement + +113 +00:01:39,320 --> 00:01:40,240 +toute l'histoire des + +114 +00:01:40,240 --> 00:01:41,480 +objets techniques et + +115 +00:01:41,480 --> 00:01:42,320 +de leur insertion + +116 +00:01:42,320 --> 00:01:42,860 +dans nos vies + +117 +00:01:42,900 --> 00:01:43,760 +pour déterminer si + +118 +00:01:43,760 --> 00:01:44,740 +ce rapport est + +119 +00:01:44,740 --> 00:01:45,740 +totalement inédit. + +120 +00:01:46,100 --> 00:01:46,920 +Mais j'ai + +121 +00:01:46,920 --> 00:01:47,560 +l'impression comme + +122 +00:01:47,560 --> 00:01:48,700 +ça que Nicolas ne se + +123 +00:01:48,700 --> 00:01:49,340 +trompe pas vraiment. + +124 +00:01:49,880 --> 00:01:50,520 +Pour autant que je + +125 +00:01:50,520 --> 00:01:51,140 +sache, + +126 +00:01:51,140 --> 00:01:52,060 +il y a eu plein de + +127 +00:01:52,060 --> 00:01:52,940 +discussions autour + +128 +00:01:52,940 --> 00:01:54,060 +de la voiture ou + +129 +00:01:54,060 --> 00:01:54,980 +même du téléphone. + +130 +00:01:55,340 --> 00:01:56,400 +Mais la dépendance + +131 +00:01:56,400 --> 00:01:57,460 +n'était pas du même + +132 +00:01:57,460 --> 00:01:57,780 +ordre. + +133 +00:01:57,780 --> 00:01:58,780 +Donc le rejet non + +134 +00:01:58,780 --> 00:01:59,380 +plus n'était pas du + +135 +00:01:59,380 --> 00:01:59,840 +même ordre. + +136 +00:01:59,980 --> 00:02:00,880 +On peut adorer sa + +137 +00:02:00,880 --> 00:02:01,460 +bagnole, + +138 +00:02:01,460 --> 00:02:02,340 +en avoir besoin pour + +139 +00:02:02,340 --> 00:02:03,020 +plein de choses. + +140 +00:02:03,280 --> 00:02:04,680 +Et bien, le soir, + +141 +00:02:04,680 --> 00:02:05,380 +quand on va se + +142 +00:02:05,380 --> 00:02:05,800 +coucher, + +143 +00:02:05,800 --> 00:02:06,360 +on la laisse. + +144 +00:02:06,980 --> 00:02:07,800 +On ne l'a pas dans + +145 +00:02:07,800 --> 00:02:08,680 +la main quand on est + +146 +00:02:08,680 --> 00:02:09,140 +au lit, + +147 +00:02:09,140 --> 00:02:09,680 +on ne l'emmène pas + +148 +00:02:09,680 --> 00:02:10,480 +au chiottes. + +149 +00:02:10,860 --> 00:02:11,480 +On pouvait être + +150 +00:02:11,480 --> 00:02:13,100 +énervé par son môme + +151 +00:02:13,100 --> 00:02:13,800 +qui occupait la + +152 +00:02:13,800 --> 00:02:14,600 +ligne de téléphone + +153 +00:02:14,600 --> 00:02:15,360 +pendant une heure + +154 +00:02:15,360 --> 00:02:15,960 +chaque soir pour + +155 +00:02:15,960 --> 00:02:16,600 +discuter avec un + +156 +00:02:16,600 --> 00:02:16,900 +copain. + +157 +00:02:17,280 --> 00:02:17,940 +Mais ça ne + +158 +00:02:17,940 --> 00:02:18,980 +ressemblait pas à ce + +159 +00:02:18,980 --> 00:02:20,120 +qu'on peut ressentir + +160 +00:02:20,120 --> 00:02:21,340 +à voir ce même môme + +161 +00:02:21,340 --> 00:02:21,880 +aujourd'hui, + +162 +00:02:22,140 --> 00:02:23,180 +continuellement avec + +163 +00:02:23,180 --> 00:02:23,940 +son smartphone dans + +164 +00:02:23,940 --> 00:02:24,360 +la main, + +165 +00:02:24,360 --> 00:02:25,060 +comme si c'était une + +166 +00:02:25,060 --> 00:02:25,820 +sorte de pacemaker + +167 +00:02:25,820 --> 00:02:26,300 +externe, + +168 +00:02:26,340 --> 00:02:27,360 +comme si le lâcher + +169 +00:02:27,360 --> 00:02:28,000 +allait entraîner sa + +170 +00:02:28,000 --> 00:02:28,840 +mort immédiate. + +171 +00:02:29,040 --> 00:02:29,280 +Bon, + +172 +00:02:29,280 --> 00:02:29,880 +je dis ça pour le + +173 +00:02:29,880 --> 00:02:30,320 +môme, + +174 +00:02:30,320 --> 00:02:31,140 +mais c'est évidemment + +175 +00:02:31,140 --> 00:02:31,760 +valable pour nous + +176 +00:02:31,760 --> 00:02:31,960 +aussi. + +177 +00:02:32,340 --> 00:02:33,460 +Donc, + +178 +00:02:33,460 --> 00:02:34,240 +rapport inédit. + +179 +00:02:34,240 --> 00:02:35,480 +D'accord. + +180 +00:02:35,480 --> 00:02:36,680 +Mais pourquoi a-t-on + +181 +00:02:36,680 --> 00:02:37,260 +l'impression qu'on + +182 +00:02:37,260 --> 00:02:38,220 +n'en sortira jamais? + +183 +00:02:38,860 --> 00:02:39,780 +Est-ce qu'il faut en + +184 +00:02:39,780 --> 00:02:40,660 +remettre la faute + +185 +00:02:40,660 --> 00:02:41,720 +sur les gens qui ont + +186 +00:02:41,720 --> 00:02:42,780 +créé cet outil + +187 +00:02:42,780 --> 00:02:43,500 +merveilleux et + +188 +00:02:43,500 --> 00:02:43,860 +diabolique, + +189 +00:02:43,860 --> 00:02:44,660 +et diabolique parce + +190 +00:02:44,660 --> 00:02:45,320 +que merveilleux? + +191 +00:02:46,340 --> 00:02:47,480 +Les économistes + +192 +00:02:47,480 --> 00:02:47,820 +parlent de + +193 +00:02:47,820 --> 00:02:48,500 +dépendance du + +194 +00:02:48,500 --> 00:02:48,820 +sentier. + +195 +00:02:48,860 --> 00:02:49,700 +C'est l'idée qu'on + +196 +00:02:49,700 --> 00:02:50,780 +est sur un sentier + +197 +00:02:50,780 --> 00:02:51,900 +qui a été établi, + +198 +00:02:51,900 --> 00:02:52,720 +soit volontairement + +199 +00:02:52,720 --> 00:02:54,240 +en marchant dessus, + +200 +00:02:54,240 --> 00:02:55,500 +soit en définissant + +201 +00:02:55,500 --> 00:02:56,040 +des bornes, + +202 +00:02:56,040 --> 00:02:56,820 +en définissant une + +203 +00:02:56,820 --> 00:02:57,420 +signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_20.vtt b/tests/expected/split_subtitles/smartphone.mp3_20.vtt new file mode 100644 index 0000000000000000000000000000000000000000..b880a5ca949532022a422bc316c7efd245866555 --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_20.vtt @@ -0,0 +1,611 @@ +WEBVTT + +00:00.380 --> 00:01.080 +C'est évident ce que + +00:01.080 --> 00:01.780 +dit Nicolas, + +00:01.780 --> 00:02.380 +mais je ne me + +00:02.380 --> 00:02.840 +l'étais jamais + +00:02.840 --> 00:03.620 +formulé comme ça. + +00:04.080 --> 00:05.000 +Ce qui fait la force + +00:05.000 --> 00:05.880 +du smartphone, + +00:05.880 --> 00:06.520 +c'est pas seulement + +00:06.520 --> 00:07.560 +l'accumulation des + +00:07.560 --> 00:07.920 +fonctions, + +00:08.320 --> 00:09.100 +mais la manière dont + +00:09.100 --> 00:10.320 +elles interagissent + +00:10.320 --> 00:10.880 +entre elles. + +00:10.960 --> 00:11.400 +Ce qui dit + +00:11.400 --> 00:11.780 +d'ailleurs sur la + +00:11.780 --> 00:12.120 +photo, + +00:12.120 --> 00:13.000 +c'est hyper convaincant. + +00:13.340 --> 00:14.340 +Alors évidemment, + +00:14.340 --> 00:15.160 +il faudrait ajouter + +00:15.160 --> 00:16.020 +les interfaces. + +00:16.220 --> 00:17.260 +L'écran tactile a + +00:17.260 --> 00:18.620 +été beaucoup très + +00:18.620 --> 00:19.360 +souvent mentionné. + +00:19.840 --> 00:20.520 +Mais bon, + +00:20.520 --> 00:20.960 +il faut dire qu'il + +00:20.960 --> 00:22.100 +profite aussi de 20 + +00:22.100 --> 00:22.920 +ans pendant lesquels + +00:22.920 --> 00:23.720 +les ordinateurs nous + +00:23.720 --> 00:24.500 +ont appris à cliquer + +00:24.500 --> 00:25.260 +sur des icônes. + +00:25.420 --> 00:26.660 +Sauf que le + +00:26.660 --> 00:27.620 +smartphone ajoute le + +00:27.620 --> 00:28.180 +toucher, + +00:28.180 --> 00:28.680 +ce qui rend le + +00:28.680 --> 00:30.220 +contact plus direct, + +00:30.220 --> 00:30.640 +plus sensible. + +00:31.040 --> 00:31.700 +Et puis évidemment, + +00:31.700 --> 00:32.120 +il faudrait parler + +00:32.120 --> 00:32.480 +aussi des + +00:32.480 --> 00:33.180 +applications qui + +00:33.180 --> 00:33.960 +permettent de + +00:33.960 --> 00:34.800 +contourner le côté + +00:34.800 --> 00:35.780 +touffu de la + +00:35.780 --> 00:36.780 +navigation web pour + +00:36.780 --> 00:37.680 +aller directement au + +00:37.680 --> 00:37.820 +but. + +00:37.820 --> 00:39.420 +Bref, tout ça, + +00:39.420 --> 00:40.160 +ce sont les + +00:40.160 --> 00:40.960 +conditions qui + +00:40.960 --> 00:42.060 +permettent de créer + +00:42.060 --> 00:42.800 +cet objet dont + +00:42.800 --> 00:43.700 +Nicolas dit qu'il + +00:43.700 --> 00:43.880 +est + +00:43.880 --> 00:44.980 +vraisemblablement + +00:44.980 --> 00:45.700 +inédit dans + +00:45.700 --> 00:46.180 +l'histoire de + +00:46.180 --> 00:46.580 +l'humanité. + +00:46.600 --> 00:47.720 +Mais ça, + +00:47.720 --> 00:48.260 +ça soulève une autre + +00:48.260 --> 00:48.820 +interrogation. + +00:49.220 --> 00:49.980 +Est-ce que le fait + +00:49.980 --> 00:51.120 +que cet objet soit + +00:51.120 --> 00:52.420 +inédit induit que + +00:52.420 --> 00:53.660 +notre rapport à lui + +00:53.660 --> 00:55.000 +est aussi un rapport + +00:55.000 --> 00:55.460 +inédit? + +00:55.460 --> 00:56.240 +Je veux dire, + +00:56.240 --> 00:56.580 +est-ce que le + +00:56.580 --> 00:57.280 +rapport qu'on a au + +00:57.280 --> 00:57.920 +smartphone est + +00:57.920 --> 00:58.660 +comparable à celui + +00:58.660 --> 00:59.460 +qu'on entretenait à + +00:59.460 --> 00:59.960 +d'autres objets + +00:59.960 --> 01:01.500 +techniques comme la + +01:01.500 --> 01:02.680 +voiture ou le + +01:02.680 --> 01:03.120 +téléphone? + +01:03.360 --> 01:05.660 +Il n'y a pas + +01:05.660 --> 01:06.420 +d'équivalent en + +01:06.420 --> 01:06.660 +fait. + +01:06.880 --> 01:07.540 +Et donc cette espèce + +01:07.540 --> 01:08.940 +de nouveauté dans la + +01:08.940 --> 01:10.240 +relation à l'objet, + +01:10.240 --> 01:10.760 +c'est fascinant et + +01:10.760 --> 01:11.520 +terrifiant. + +01:11.620 --> 01:12.440 +Parce qu'on a + +01:12.440 --> 01:13.560 +l'impression, + +01:13.560 --> 01:14.400 +comme le disent les + +01:14.400 --> 01:15.000 +utilisateurs et les + +01:15.000 --> 01:15.220 +services, + +01:15.220 --> 01:16.080 +d'être dépendants de + +01:16.080 --> 01:16.480 +cet objet, + +01:16.860 --> 01:17.340 +d'induire en fait + +01:17.340 --> 01:18.480 +une espèce de + +01:18.480 --> 01:18.600 +relation, + +01:18.600 --> 01:19.920 +de médiation avec le + +01:19.920 --> 01:21.780 +monde qui rend de + +01:21.780 --> 01:22.240 +l'ampleur et qui + +01:22.240 --> 01:22.720 +amène aussi à des + +01:22.720 --> 01:23.260 +formes de rejet. + +01:23.940 --> 01:24.940 +Donc, + +01:24.940 --> 01:26.560 +à objet inédit, + +01:26.560 --> 01:27.800 +rapport inédit. + +01:28.020 --> 01:29.280 +Et ce rapport, + +01:29.280 --> 01:29.880 +si j'en crois + +01:29.880 --> 01:30.540 +Nicolas, + +01:30.540 --> 01:31.800 +serait caractérisé + +01:31.800 --> 01:33.400 +par un mélange de + +01:33.400 --> 01:34.680 +dépendance et de + +01:34.680 --> 01:35.140 +rejet. + +01:35.780 --> 01:37.140 +Bon, en vrai, + +01:37.140 --> 01:38.080 +il faudrait remonter + +01:38.080 --> 01:39.320 +très très finement + +01:39.320 --> 01:40.240 +toute l'histoire des + +01:40.240 --> 01:41.480 +objets techniques et + +01:41.480 --> 01:42.320 +de leur insertion + +01:42.320 --> 01:42.860 +dans nos vies + +01:42.900 --> 01:43.760 +pour déterminer si + +01:43.760 --> 01:44.740 +ce rapport est + +01:44.740 --> 01:45.740 +totalement inédit. + +01:46.100 --> 01:46.920 +Mais j'ai + +01:46.920 --> 01:47.560 +l'impression comme + +01:47.560 --> 01:48.700 +ça que Nicolas ne se + +01:48.700 --> 01:49.340 +trompe pas vraiment. + +01:49.880 --> 01:50.520 +Pour autant que je + +01:50.520 --> 01:51.140 +sache, + +01:51.140 --> 01:52.060 +il y a eu plein de + +01:52.060 --> 01:52.940 +discussions autour + +01:52.940 --> 01:54.060 +de la voiture ou + +01:54.060 --> 01:54.980 +même du téléphone. + +01:55.340 --> 01:56.400 +Mais la dépendance + +01:56.400 --> 01:57.460 +n'était pas du même + +01:57.460 --> 01:57.780 +ordre. + +01:57.780 --> 01:58.780 +Donc le rejet non + +01:58.780 --> 01:59.380 +plus n'était pas du + +01:59.380 --> 01:59.840 +même ordre. + +01:59.980 --> 02:00.880 +On peut adorer sa + +02:00.880 --> 02:01.460 +bagnole, + +02:01.460 --> 02:02.340 +en avoir besoin pour + +02:02.340 --> 02:03.020 +plein de choses. + +02:03.280 --> 02:04.680 +Et bien, le soir, + +02:04.680 --> 02:05.380 +quand on va se + +02:05.380 --> 02:05.800 +coucher, + +02:05.800 --> 02:06.360 +on la laisse. + +02:06.980 --> 02:07.800 +On ne l'a pas dans + +02:07.800 --> 02:08.680 +la main quand on est + +02:08.680 --> 02:09.140 +au lit, + +02:09.140 --> 02:09.680 +on ne l'emmène pas + +02:09.680 --> 02:10.480 +au chiottes. + +02:10.860 --> 02:11.480 +On pouvait être + +02:11.480 --> 02:13.100 +énervé par son môme + +02:13.100 --> 02:13.800 +qui occupait la + +02:13.800 --> 02:14.600 +ligne de téléphone + +02:14.600 --> 02:15.360 +pendant une heure + +02:15.360 --> 02:15.960 +chaque soir pour + +02:15.960 --> 02:16.600 +discuter avec un + +02:16.600 --> 02:16.900 +copain. + +02:17.280 --> 02:17.940 +Mais ça ne + +02:17.940 --> 02:18.980 +ressemblait pas à ce + +02:18.980 --> 02:20.120 +qu'on peut ressentir + +02:20.120 --> 02:21.340 +à voir ce même môme + +02:21.340 --> 02:21.880 +aujourd'hui, + +02:22.140 --> 02:23.180 +continuellement avec + +02:23.180 --> 02:23.940 +son smartphone dans + +02:23.940 --> 02:24.360 +la main, + +02:24.360 --> 02:25.060 +comme si c'était une + +02:25.060 --> 02:25.820 +sorte de pacemaker + +02:25.820 --> 02:26.300 +externe, + +02:26.340 --> 02:27.360 +comme si le lâcher + +02:27.360 --> 02:28.000 +allait entraîner sa + +02:28.000 --> 02:28.840 +mort immédiate. + +02:29.040 --> 02:29.280 +Bon, + +02:29.280 --> 02:29.880 +je dis ça pour le + +02:29.880 --> 02:30.320 +môme, + +02:30.320 --> 02:31.140 +mais c'est évidemment + +02:31.140 --> 02:31.760 +valable pour nous + +02:31.760 --> 02:31.960 +aussi. + +02:32.340 --> 02:33.460 +Donc, + +02:33.460 --> 02:34.240 +rapport inédit. + +02:34.240 --> 02:35.480 +D'accord. + +02:35.480 --> 02:36.680 +Mais pourquoi a-t-on + +02:36.680 --> 02:37.260 +l'impression qu'on + +02:37.260 --> 02:38.220 +n'en sortira jamais? + +02:38.860 --> 02:39.780 +Est-ce qu'il faut en + +02:39.780 --> 02:40.660 +remettre la faute + +02:40.660 --> 02:41.720 +sur les gens qui ont + +02:41.720 --> 02:42.780 +créé cet outil + +02:42.780 --> 02:43.500 +merveilleux et + +02:43.500 --> 02:43.860 +diabolique, + +02:43.860 --> 02:44.660 +et diabolique parce + +02:44.660 --> 02:45.320 +que merveilleux? + +02:46.340 --> 02:47.480 +Les économistes + +02:47.480 --> 02:47.820 +parlent de + +02:47.820 --> 02:48.500 +dépendance du + +02:48.500 --> 02:48.820 +sentier. + +02:48.860 --> 02:49.700 +C'est l'idée qu'on + +02:49.700 --> 02:50.780 +est sur un sentier + +02:50.780 --> 02:51.900 +qui a été établi, + +02:51.900 --> 02:52.720 +soit volontairement + +02:52.720 --> 02:54.240 +en marchant dessus, + +02:54.240 --> 02:55.500 +soit en définissant + +02:55.500 --> 02:56.040 +des bornes, + +02:56.040 --> 02:56.820 +en définissant une + +02:56.820 --> 02:57.420 +signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_50.srt b/tests/expected/split_subtitles/smartphone.mp3_50.srt new file mode 100644 index 0000000000000000000000000000000000000000..5a9cbd960d9a7073f91c8d0412e7daf0f797d25c --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_50.srt @@ -0,0 +1,356 @@ +1 +00:00:00,380 --> 00:00:01,780 +C'est évident ce que dit Nicolas, + +2 +00:00:01,780 --> 00:00:03,620 +mais je ne me l'étais jamais formulé comme ça. + +3 +00:00:04,080 --> 00:00:05,880 +Ce qui fait la force du smartphone, + +4 +00:00:05,880 --> 00:00:07,920 +c'est pas seulement l'accumulation des fonctions, + +5 +00:00:08,320 --> 00:00:10,580 +mais la manière dont elles interagissent entre + +6 +00:00:10,580 --> 00:00:10,880 +elles. + +7 +00:00:10,960 --> 00:00:12,120 +Ce qui dit d'ailleurs sur la photo, + +8 +00:00:12,120 --> 00:00:13,000 +c'est hyper convaincant. + +9 +00:00:13,340 --> 00:00:14,340 +Alors évidemment, + +10 +00:00:14,340 --> 00:00:16,020 +il faudrait ajouter les interfaces. + +11 +00:00:16,220 --> 00:00:18,900 +L'écran tactile a été beaucoup très souvent + +12 +00:00:18,900 --> 00:00:19,360 +mentionné. + +13 +00:00:19,840 --> 00:00:20,520 +Mais bon, + +14 +00:00:20,520 --> 00:00:22,480 +il faut dire qu'il profite aussi de 20 ans pendant + +15 +00:00:22,480 --> 00:00:24,500 +lesquels les ordinateurs nous ont appris à cliquer + +16 +00:00:24,500 --> 00:00:25,260 +sur des icônes. + +17 +00:00:25,420 --> 00:00:28,180 +Sauf que le smartphone ajoute le toucher, + +18 +00:00:28,180 --> 00:00:30,640 +ce qui rend le contact plus direct, plus sensible. + +19 +00:00:31,040 --> 00:00:31,700 +Et puis évidemment, + +20 +00:00:31,700 --> 00:00:33,180 +il faudrait parler aussi des applications qui + +21 +00:00:33,180 --> 00:00:35,780 +permettent de contourner le côté touffu de la + +22 +00:00:35,780 --> 00:00:37,820 +navigation web pour aller directement au but. + +23 +00:00:37,820 --> 00:00:39,420 +Bref, tout ça, + +24 +00:00:39,420 --> 00:00:42,380 +ce sont les conditions qui permettent de créer cet + +25 +00:00:42,380 --> 00:00:44,980 +objet dont Nicolas dit qu'il est vraisemblablement + +26 +00:00:44,980 --> 00:00:46,580 +inédit dans l'histoire de l'humanité. + +27 +00:00:46,600 --> 00:00:48,820 +Mais ça, ça soulève une autre interrogation. + +28 +00:00:49,220 --> 00:00:51,800 +Est-ce que le fait que cet objet soit inédit + +29 +00:00:51,800 --> 00:00:54,700 +induit que notre rapport à lui est aussi un + +30 +00:00:54,700 --> 00:00:55,460 +rapport inédit? + +31 +00:00:55,460 --> 00:00:56,240 +Je veux dire, + +32 +00:00:56,240 --> 00:00:57,920 +est-ce que le rapport qu'on a au smartphone est + +33 +00:00:57,920 --> 00:00:59,700 +comparable à celui qu'on entretenait à d'autres + +34 +00:00:59,700 --> 00:01:02,680 +objets techniques comme la voiture ou le + +35 +00:01:02,680 --> 00:01:03,120 +téléphone? + +36 +00:01:03,360 --> 00:01:06,660 +Il n'y a pas d'équivalent en fait. + +37 +00:01:06,880 --> 00:01:09,220 +Et donc cette espèce de nouveauté dans la relation + +38 +00:01:09,220 --> 00:01:11,520 +à l'objet, c'est fascinant et terrifiant. + +39 +00:01:11,620 --> 00:01:13,560 +Parce qu'on a l'impression, + +40 +00:01:13,560 --> 00:01:15,220 +comme le disent les utilisateurs et les services, + +41 +00:01:15,220 --> 00:01:16,480 +d'être dépendants de cet objet, + +42 +00:01:16,860 --> 00:01:18,600 +d'induire en fait une espèce de relation, + +43 +00:01:18,600 --> 00:01:22,020 +de médiation avec le monde qui rend de l'ampleur + +44 +00:01:22,020 --> 00:01:23,260 +et qui amène aussi à des formes de rejet. + +45 +00:01:23,940 --> 00:01:27,800 +Donc, à objet inédit, rapport inédit. + +46 +00:01:28,020 --> 00:01:30,540 +Et ce rapport, si j'en crois Nicolas, + +47 +00:01:30,540 --> 00:01:34,540 +serait caractérisé par un mélange de dépendance et + +48 +00:01:34,540 --> 00:01:35,140 +de rejet. + +49 +00:01:35,780 --> 00:01:37,140 +Bon, en vrai, + +50 +00:01:37,140 --> 00:01:39,700 +il faudrait remonter très très finement toute + +51 +00:01:39,700 --> 00:01:41,840 +l'histoire des objets techniques et de leur + +52 +00:01:41,840 --> 00:01:42,860 +insertion dans nos vies + +53 +00:01:42,900 --> 00:01:45,300 +pour déterminer si ce rapport est totalement + +54 +00:01:45,300 --> 00:01:45,740 +inédit. + +55 +00:01:46,100 --> 00:01:48,700 +Mais j'ai l'impression comme ça que Nicolas ne se + +56 +00:01:48,700 --> 00:01:49,340 +trompe pas vraiment. + +57 +00:01:49,880 --> 00:01:51,140 +Pour autant que je sache, + +58 +00:01:51,140 --> 00:01:53,520 +il y a eu plein de discussions autour de la + +59 +00:01:53,520 --> 00:01:54,980 +voiture ou même du téléphone. + +60 +00:01:55,340 --> 00:01:57,780 +Mais la dépendance n'était pas du même ordre. + +61 +00:01:57,780 --> 00:01:59,840 +Donc le rejet non plus n'était pas du même ordre. + +62 +00:01:59,980 --> 00:02:01,460 +On peut adorer sa bagnole, + +63 +00:02:01,460 --> 00:02:03,020 +en avoir besoin pour plein de choses. + +64 +00:02:03,280 --> 00:02:05,800 +Et bien, le soir, quand on va se coucher, + +65 +00:02:05,800 --> 00:02:06,360 +on la laisse. + +66 +00:02:06,980 --> 00:02:09,140 +On ne l'a pas dans la main quand on est au lit, + +67 +00:02:09,140 --> 00:02:10,480 +on ne l'emmène pas au chiottes. + +68 +00:02:10,860 --> 00:02:13,760 +On pouvait être énervé par son môme qui occupait + +69 +00:02:13,760 --> 00:02:15,540 +la ligne de téléphone pendant une heure chaque + +70 +00:02:15,540 --> 00:02:16,900 +soir pour discuter avec un copain. + +71 +00:02:17,280 --> 00:02:19,480 +Mais ça ne ressemblait pas à ce qu'on peut + +72 +00:02:19,480 --> 00:02:21,880 +ressentir à voir ce même môme aujourd'hui, + +73 +00:02:22,140 --> 00:02:24,360 +continuellement avec son smartphone dans la main, + +74 +00:02:24,360 --> 00:02:26,300 +comme si c'était une sorte de pacemaker externe, + +75 +00:02:26,340 --> 00:02:28,220 +comme si le lâcher allait entraîner sa mort + +76 +00:02:28,220 --> 00:02:28,840 +immédiate. + +77 +00:02:29,040 --> 00:02:30,320 +Bon, je dis ça pour le môme, + +78 +00:02:30,320 --> 00:02:31,960 +mais c'est évidemment valable pour nous aussi. + +79 +00:02:32,340 --> 00:02:35,480 +Donc, rapport inédit. D'accord. + +80 +00:02:35,480 --> 00:02:37,440 +Mais pourquoi a-t-on l'impression qu'on n'en + +81 +00:02:37,440 --> 00:02:38,220 +sortira jamais? + +82 +00:02:38,860 --> 00:02:41,280 +Est-ce qu'il faut en remettre la faute sur les + +83 +00:02:41,280 --> 00:02:43,500 +gens qui ont créé cet outil merveilleux et + +84 +00:02:43,500 --> 00:02:45,320 +diabolique, et diabolique parce que merveilleux? + +85 +00:02:46,340 --> 00:02:48,820 +Les économistes parlent de dépendance du sentier. + +86 +00:02:48,860 --> 00:02:51,120 +C'est l'idée qu'on est sur un sentier qui a été + +87 +00:02:51,120 --> 00:02:54,240 +établi, soit volontairement en marchant dessus, + +88 +00:02:54,240 --> 00:02:56,040 +soit en définissant des bornes, + +89 +00:02:56,040 --> 00:02:57,420 +en définissant une signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_50.vtt b/tests/expected/split_subtitles/smartphone.mp3_50.vtt new file mode 100644 index 0000000000000000000000000000000000000000..68d9b01306781ba2fcb81166b675053824255222 --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_50.vtt @@ -0,0 +1,269 @@ +WEBVTT + +00:00.380 --> 00:01.780 +C'est évident ce que dit Nicolas, + +00:01.780 --> 00:03.620 +mais je ne me l'étais jamais formulé comme ça. + +00:04.080 --> 00:05.880 +Ce qui fait la force du smartphone, + +00:05.880 --> 00:07.920 +c'est pas seulement l'accumulation des fonctions, + +00:08.320 --> 00:10.580 +mais la manière dont elles interagissent entre + +00:10.580 --> 00:10.880 +elles. + +00:10.960 --> 00:12.120 +Ce qui dit d'ailleurs sur la photo, + +00:12.120 --> 00:13.000 +c'est hyper convaincant. + +00:13.340 --> 00:14.340 +Alors évidemment, + +00:14.340 --> 00:16.020 +il faudrait ajouter les interfaces. + +00:16.220 --> 00:18.900 +L'écran tactile a été beaucoup très souvent + +00:18.900 --> 00:19.360 +mentionné. + +00:19.840 --> 00:20.520 +Mais bon, + +00:20.520 --> 00:22.480 +il faut dire qu'il profite aussi de 20 ans pendant + +00:22.480 --> 00:24.500 +lesquels les ordinateurs nous ont appris à cliquer + +00:24.500 --> 00:25.260 +sur des icônes. + +00:25.420 --> 00:28.180 +Sauf que le smartphone ajoute le toucher, + +00:28.180 --> 00:30.640 +ce qui rend le contact plus direct, plus sensible. + +00:31.040 --> 00:31.700 +Et puis évidemment, + +00:31.700 --> 00:33.180 +il faudrait parler aussi des applications qui + +00:33.180 --> 00:35.780 +permettent de contourner le côté touffu de la + +00:35.780 --> 00:37.820 +navigation web pour aller directement au but. + +00:37.820 --> 00:39.420 +Bref, tout ça, + +00:39.420 --> 00:42.380 +ce sont les conditions qui permettent de créer cet + +00:42.380 --> 00:44.980 +objet dont Nicolas dit qu'il est vraisemblablement + +00:44.980 --> 00:46.580 +inédit dans l'histoire de l'humanité. + +00:46.600 --> 00:48.820 +Mais ça, ça soulève une autre interrogation. + +00:49.220 --> 00:51.800 +Est-ce que le fait que cet objet soit inédit + +00:51.800 --> 00:54.700 +induit que notre rapport à lui est aussi un + +00:54.700 --> 00:55.460 +rapport inédit? + +00:55.460 --> 00:56.240 +Je veux dire, + +00:56.240 --> 00:57.920 +est-ce que le rapport qu'on a au smartphone est + +00:57.920 --> 00:59.700 +comparable à celui qu'on entretenait à d'autres + +00:59.700 --> 01:02.680 +objets techniques comme la voiture ou le + +01:02.680 --> 01:03.120 +téléphone? + +01:03.360 --> 01:06.660 +Il n'y a pas d'équivalent en fait. + +01:06.880 --> 01:09.220 +Et donc cette espèce de nouveauté dans la relation + +01:09.220 --> 01:11.520 +à l'objet, c'est fascinant et terrifiant. + +01:11.620 --> 01:13.560 +Parce qu'on a l'impression, + +01:13.560 --> 01:15.220 +comme le disent les utilisateurs et les services, + +01:15.220 --> 01:16.480 +d'être dépendants de cet objet, + +01:16.860 --> 01:18.600 +d'induire en fait une espèce de relation, + +01:18.600 --> 01:22.020 +de médiation avec le monde qui rend de l'ampleur + +01:22.020 --> 01:23.260 +et qui amène aussi à des formes de rejet. + +01:23.940 --> 01:27.800 +Donc, à objet inédit, rapport inédit. + +01:28.020 --> 01:30.540 +Et ce rapport, si j'en crois Nicolas, + +01:30.540 --> 01:34.540 +serait caractérisé par un mélange de dépendance et + +01:34.540 --> 01:35.140 +de rejet. + +01:35.780 --> 01:37.140 +Bon, en vrai, + +01:37.140 --> 01:39.700 +il faudrait remonter très très finement toute + +01:39.700 --> 01:41.840 +l'histoire des objets techniques et de leur + +01:41.840 --> 01:42.860 +insertion dans nos vies + +01:42.900 --> 01:45.300 +pour déterminer si ce rapport est totalement + +01:45.300 --> 01:45.740 +inédit. + +01:46.100 --> 01:48.700 +Mais j'ai l'impression comme ça que Nicolas ne se + +01:48.700 --> 01:49.340 +trompe pas vraiment. + +01:49.880 --> 01:51.140 +Pour autant que je sache, + +01:51.140 --> 01:53.520 +il y a eu plein de discussions autour de la + +01:53.520 --> 01:54.980 +voiture ou même du téléphone. + +01:55.340 --> 01:57.780 +Mais la dépendance n'était pas du même ordre. + +01:57.780 --> 01:59.840 +Donc le rejet non plus n'était pas du même ordre. + +01:59.980 --> 02:01.460 +On peut adorer sa bagnole, + +02:01.460 --> 02:03.020 +en avoir besoin pour plein de choses. + +02:03.280 --> 02:05.800 +Et bien, le soir, quand on va se coucher, + +02:05.800 --> 02:06.360 +on la laisse. + +02:06.980 --> 02:09.140 +On ne l'a pas dans la main quand on est au lit, + +02:09.140 --> 02:10.480 +on ne l'emmène pas au chiottes. + +02:10.860 --> 02:13.760 +On pouvait être énervé par son môme qui occupait + +02:13.760 --> 02:15.540 +la ligne de téléphone pendant une heure chaque + +02:15.540 --> 02:16.900 +soir pour discuter avec un copain. + +02:17.280 --> 02:19.480 +Mais ça ne ressemblait pas à ce qu'on peut + +02:19.480 --> 02:21.880 +ressentir à voir ce même môme aujourd'hui, + +02:22.140 --> 02:24.360 +continuellement avec son smartphone dans la main, + +02:24.360 --> 02:26.300 +comme si c'était une sorte de pacemaker externe, + +02:26.340 --> 02:28.220 +comme si le lâcher allait entraîner sa mort + +02:28.220 --> 02:28.840 +immédiate. + +02:29.040 --> 02:30.320 +Bon, je dis ça pour le môme, + +02:30.320 --> 02:31.960 +mais c'est évidemment valable pour nous aussi. + +02:32.340 --> 02:35.480 +Donc, rapport inédit. D'accord. + +02:35.480 --> 02:37.440 +Mais pourquoi a-t-on l'impression qu'on n'en + +02:37.440 --> 02:38.220 +sortira jamais? + +02:38.860 --> 02:41.280 +Est-ce qu'il faut en remettre la faute sur les + +02:41.280 --> 02:43.500 +gens qui ont créé cet outil merveilleux et + +02:43.500 --> 02:45.320 +diabolique, et diabolique parce que merveilleux? + +02:46.340 --> 02:48.820 +Les économistes parlent de dépendance du sentier. + +02:48.860 --> 02:51.120 +C'est l'idée qu'on est sur un sentier qui a été + +02:51.120 --> 02:54.240 +établi, soit volontairement en marchant dessus, + +02:54.240 --> 02:56.040 +soit en définissant des bornes, + +02:56.040 --> 02:57.420 +en définissant une signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_6.srt b/tests/expected/split_subtitles/smartphone.mp3_6.srt new file mode 100644 index 0000000000000000000000000000000000000000..e03246f80d846a479b5404c598c34d50b8266e96 --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_6.srt @@ -0,0 +1,2024 @@ +1 +00:00:00,380 --> 00:00:00,580 +C'est + +2 +00:00:00,580 --> 00:00:00,880 +évident + +3 +00:00:00,880 --> 00:00:01,080 +ce que + +4 +00:00:01,080 --> 00:00:01,200 +dit + +5 +00:00:01,200 --> 00:00:01,780 +Nicolas, + +6 +00:00:01,780 --> 00:00:01,900 +mais + +7 +00:00:01,900 --> 00:00:02,340 +je ne + +8 +00:00:02,340 --> 00:00:02,380 +me + +9 +00:00:02,380 --> 00:00:02,580 +l'étais + +10 +00:00:02,580 --> 00:00:02,840 +jamais + +11 +00:00:02,840 --> 00:00:03,260 +formulé + +12 +00:00:03,260 --> 00:00:03,420 +comme + +13 +00:00:03,420 --> 00:00:03,620 +ça. + +14 +00:00:04,080 --> 00:00:04,340 +Ce qui + +15 +00:00:04,340 --> 00:00:04,480 +fait + +16 +00:00:04,480 --> 00:00:04,660 +la + +17 +00:00:04,660 --> 00:00:05,000 +force + +18 +00:00:05,000 --> 00:00:05,200 +du + +19 +00:00:05,200 --> 00:00:05,880 +smartphone, + +20 +00:00:05,880 --> 00:00:06,120 +c'est + +21 +00:00:06,120 --> 00:00:06,260 +pas + +22 +00:00:06,260 --> 00:00:06,520 +seulement + +23 +00:00:06,520 --> 00:00:07,380 +l'accumulation + +24 +00:00:07,380 --> 00:00:07,560 +des + +25 +00:00:07,560 --> 00:00:07,920 +fonctions, + +26 +00:00:08,320 --> 00:00:08,440 +mais + +27 +00:00:08,440 --> 00:00:08,600 +la + +28 +00:00:08,600 --> 00:00:08,900 +manière + +29 +00:00:08,900 --> 00:00:09,100 +dont + +30 +00:00:09,100 --> 00:00:09,480 +elles + +31 +00:00:09,480 --> 00:00:10,320 +interagissent + +32 +00:00:10,320 --> 00:00:10,580 +entre + +33 +00:00:10,580 --> 00:00:10,880 +elles. + +34 +00:00:10,960 --> 00:00:11,220 +Ce qui + +35 +00:00:11,220 --> 00:00:11,400 +dit + +36 +00:00:11,400 --> 00:00:11,560 +d'ailleurs + +37 +00:00:11,560 --> 00:00:11,780 +sur la + +38 +00:00:11,780 --> 00:00:12,120 +photo, + +39 +00:00:12,120 --> 00:00:12,200 +c'est + +40 +00:00:12,200 --> 00:00:12,420 +hyper + +41 +00:00:12,420 --> 00:00:13,000 +convaincant. + +42 +00:00:13,340 --> 00:00:13,620 +Alors + +43 +00:00:13,620 --> 00:00:14,340 +évidemment, + +44 +00:00:14,340 --> 00:00:14,380 +il + +45 +00:00:14,380 --> 00:00:14,740 +faudrait + +46 +00:00:14,740 --> 00:00:15,160 +ajouter + +47 +00:00:15,160 --> 00:00:15,520 +les + +48 +00:00:15,520 --> 00:00:16,020 +interfaces. + +49 +00:00:16,220 --> 00:00:16,700 +L'écran + +50 +00:00:16,700 --> 00:00:17,060 +tactile + +51 +00:00:17,060 --> 00:00:17,880 +a été + +52 +00:00:17,880 --> 00:00:18,280 +beaucoup + +53 +00:00:18,280 --> 00:00:18,620 +très + +54 +00:00:18,620 --> 00:00:18,900 +souvent + +55 +00:00:18,900 --> 00:00:19,360 +mentionné. + +56 +00:00:19,840 --> 00:00:20,220 +Mais + +57 +00:00:20,220 --> 00:00:20,520 +bon, + +58 +00:00:20,520 --> 00:00:20,600 +il + +59 +00:00:20,600 --> 00:00:20,700 +faut + +60 +00:00:20,700 --> 00:00:20,840 +dire + +61 +00:00:20,840 --> 00:00:20,960 +qu'il + +62 +00:00:20,960 --> 00:00:21,260 +profite + +63 +00:00:21,260 --> 00:00:21,680 +aussi + +64 +00:00:21,680 --> 00:00:22,100 +de 20 + +65 +00:00:22,100 --> 00:00:22,320 +ans + +66 +00:00:22,320 --> 00:00:22,480 +pendant + +67 +00:00:22,480 --> 00:00:22,920 +lesquels + +68 +00:00:22,920 --> 00:00:23,040 +les + +69 +00:00:23,040 --> 00:00:23,540 +ordinateurs + +70 +00:00:23,540 --> 00:00:23,720 +nous + +71 +00:00:23,720 --> 00:00:23,820 +ont + +72 +00:00:23,820 --> 00:00:24,100 +appris + +73 +00:00:24,100 --> 00:00:24,240 +à + +74 +00:00:24,240 --> 00:00:24,500 +cliquer + +75 +00:00:24,500 --> 00:00:24,660 +sur + +76 +00:00:24,660 --> 00:00:24,940 +des + +77 +00:00:24,940 --> 00:00:25,260 +icônes. + +78 +00:00:25,420 --> 00:00:25,760 +Sauf + +79 +00:00:25,760 --> 00:00:26,660 +que le + +80 +00:00:26,660 --> 00:00:27,060 +smartphone + +81 +00:00:27,060 --> 00:00:27,440 +ajoute + +82 +00:00:27,440 --> 00:00:27,620 +le + +83 +00:00:27,620 --> 00:00:28,180 +toucher, + +84 +00:00:28,180 --> 00:00:28,280 +ce qui + +85 +00:00:28,280 --> 00:00:28,480 +rend + +86 +00:00:28,480 --> 00:00:28,680 +le + +87 +00:00:28,680 --> 00:00:29,100 +contact + +88 +00:00:29,100 --> 00:00:29,460 +plus + +89 +00:00:29,460 --> 00:00:30,220 +direct, + +90 +00:00:30,220 --> 00:00:30,260 +plus + +91 +00:00:30,260 --> 00:00:30,640 +sensible. + +92 +00:00:31,040 --> 00:00:31,220 +Et + +93 +00:00:31,220 --> 00:00:31,360 +puis + +94 +00:00:31,360 --> 00:00:31,700 +évidemment, + +95 +00:00:31,700 --> 00:00:31,740 +il + +96 +00:00:31,740 --> 00:00:31,940 +faudrait + +97 +00:00:31,940 --> 00:00:32,120 +parler + +98 +00:00:32,120 --> 00:00:32,340 +aussi + +99 +00:00:32,340 --> 00:00:32,480 +des + +100 +00:00:32,480 --> 00:00:32,900 +applications + +101 +00:00:32,900 --> 00:00:33,180 +qui + +102 +00:00:33,180 --> 00:00:33,740 +permettent + +103 +00:00:33,740 --> 00:00:33,960 +de + +104 +00:00:33,960 --> 00:00:34,420 +contourner + +105 +00:00:34,420 --> 00:00:34,520 +le + +106 +00:00:34,520 --> 00:00:34,800 +côté + +107 +00:00:34,800 --> 00:00:35,320 +touffu + +108 +00:00:35,320 --> 00:00:35,780 +de la + +109 +00:00:35,780 --> 00:00:36,240 +navigation + +110 +00:00:36,240 --> 00:00:36,600 +web + +111 +00:00:36,600 --> 00:00:36,780 +pour + +112 +00:00:36,780 --> 00:00:36,980 +aller + +113 +00:00:36,980 --> 00:00:37,520 +directement + +114 +00:00:37,520 --> 00:00:37,680 +au + +115 +00:00:37,680 --> 00:00:37,820 +but. + +116 +00:00:37,820 --> 00:00:38,760 +Bref, + +117 +00:00:38,760 --> 00:00:38,980 +tout + +118 +00:00:38,980 --> 00:00:39,420 +ça, + +119 +00:00:39,420 --> 00:00:39,880 +ce sont + +120 +00:00:39,880 --> 00:00:40,160 +les + +121 +00:00:40,160 --> 00:00:40,680 +conditions + +122 +00:00:40,680 --> 00:00:40,960 +qui + +123 +00:00:40,960 --> 00:00:41,460 +permettent + +124 +00:00:41,460 --> 00:00:41,600 +de + +125 +00:00:41,600 --> 00:00:42,060 +créer + +126 +00:00:42,060 --> 00:00:42,380 +cet + +127 +00:00:42,380 --> 00:00:42,600 +objet + +128 +00:00:42,600 --> 00:00:42,800 +dont + +129 +00:00:42,800 --> 00:00:43,260 +Nicolas + +130 +00:00:43,260 --> 00:00:43,500 +dit + +131 +00:00:43,500 --> 00:00:43,700 +qu'il + +132 +00:00:43,700 --> 00:00:43,880 +est + +133 +00:00:43,880 --> 00:00:44,980 +vraisemblablement + +134 +00:00:44,980 --> 00:00:45,380 +inédit + +135 +00:00:45,380 --> 00:00:45,700 +dans + +136 +00:00:45,700 --> 00:00:45,980 +l'histoire + +137 +00:00:45,980 --> 00:00:46,180 +de + +138 +00:00:46,180 --> 00:00:46,580 +l'humanité. + +139 +00:00:46,600 --> 00:00:47,240 +Mais + +140 +00:00:47,240 --> 00:00:47,720 +ça, + +141 +00:00:47,720 --> 00:00:47,840 +ça soulève + +142 +00:00:47,840 --> 00:00:48,020 +une + +143 +00:00:48,020 --> 00:00:48,260 +autre + +144 +00:00:48,260 --> 00:00:48,820 +interrogation. + +145 +00:00:49,220 --> 00:00:49,620 +Est-ce + +146 +00:00:49,620 --> 00:00:49,820 +que le + +147 +00:00:49,820 --> 00:00:49,980 +fait + +148 +00:00:49,980 --> 00:00:50,140 +que + +149 +00:00:50,140 --> 00:00:50,320 +cet + +150 +00:00:50,320 --> 00:00:50,660 +objet + +151 +00:00:50,660 --> 00:00:51,120 +soit + +152 +00:00:51,120 --> 00:00:51,800 +inédit + +153 +00:00:51,800 --> 00:00:52,320 +induit + +154 +00:00:52,320 --> 00:00:52,420 +que + +155 +00:00:52,420 --> 00:00:52,720 +notre + +156 +00:00:52,720 --> 00:00:53,280 +rapport + +157 +00:00:53,280 --> 00:00:53,660 +à lui + +158 +00:00:53,660 --> 00:00:54,020 +est + +159 +00:00:54,020 --> 00:00:54,540 +aussi + +160 +00:00:54,540 --> 00:00:54,700 +un + +161 +00:00:54,700 --> 00:00:55,000 +rapport + +162 +00:00:55,000 --> 00:00:55,460 +inédit? + +163 +00:00:55,460 --> 00:00:55,900 +Je + +164 +00:00:55,900 --> 00:00:56,000 +veux + +165 +00:00:56,000 --> 00:00:56,240 +dire, + +166 +00:00:56,240 --> 00:00:56,360 +est-ce + +167 +00:00:56,360 --> 00:00:56,580 +que le + +168 +00:00:56,580 --> 00:00:56,880 +rapport + +169 +00:00:56,880 --> 00:00:57,040 +qu'on + +170 +00:00:57,040 --> 00:00:57,280 +a au + +171 +00:00:57,280 --> 00:00:57,600 +smartphone + +172 +00:00:57,600 --> 00:00:57,920 +est + +173 +00:00:57,920 --> 00:00:58,240 +comparable + +174 +00:00:58,240 --> 00:00:58,480 +à + +175 +00:00:58,480 --> 00:00:58,660 +celui + +176 +00:00:58,660 --> 00:00:58,900 +qu'on + +177 +00:00:58,900 --> 00:00:59,320 +entretenait + +178 +00:00:59,320 --> 00:00:59,460 +à + +179 +00:00:59,460 --> 00:00:59,700 +d'autres + +180 +00:00:59,700 --> 00:00:59,960 +objets + +181 +00:00:59,960 --> 00:01:00,460 +techniques + +182 +00:01:00,460 --> 00:01:00,880 +comme + +183 +00:01:00,880 --> 00:01:01,500 +la + +184 +00:01:01,500 --> 00:01:02,060 +voiture + +185 +00:01:02,060 --> 00:01:02,680 +ou le + +186 +00:01:02,680 --> 00:01:03,120 +téléphone? + +187 +00:01:03,360 --> 00:01:05,480 +Il n'y + +188 +00:01:05,480 --> 00:01:05,660 +a pas + +189 +00:01:05,660 --> 00:01:06,220 +d'équivalent + +190 +00:01:06,220 --> 00:01:06,420 +en + +191 +00:01:06,420 --> 00:01:06,660 +fait. + +192 +00:01:06,880 --> 00:01:06,980 +Et + +193 +00:01:06,980 --> 00:01:07,080 +donc + +194 +00:01:07,080 --> 00:01:07,280 +cette + +195 +00:01:07,280 --> 00:01:07,540 +espèce + +196 +00:01:07,540 --> 00:01:07,680 +de + +197 +00:01:07,680 --> 00:01:08,480 +nouveauté + +198 +00:01:08,480 --> 00:01:08,660 +dans + +199 +00:01:08,660 --> 00:01:08,940 +la + +200 +00:01:08,940 --> 00:01:09,220 +relation + +201 +00:01:09,220 --> 00:01:09,380 +à + +202 +00:01:09,380 --> 00:01:10,240 +l'objet, + +203 +00:01:10,240 --> 00:01:10,380 +c'est + +204 +00:01:10,380 --> 00:01:10,640 +fascinant + +205 +00:01:10,640 --> 00:01:10,760 +et + +206 +00:01:10,760 --> 00:01:11,520 +terrifiant. + +207 +00:01:11,620 --> 00:01:11,860 +Parce + +208 +00:01:11,860 --> 00:01:12,120 +qu'on + +209 +00:01:12,120 --> 00:01:12,440 +a + +210 +00:01:12,440 --> 00:01:13,560 +l'impression, + +211 +00:01:13,560 --> 00:01:13,840 +comme + +212 +00:01:13,840 --> 00:01:14,000 +le + +213 +00:01:14,000 --> 00:01:14,200 +disent + +214 +00:01:14,200 --> 00:01:14,400 +les + +215 +00:01:14,400 --> 00:01:14,840 +utilisateurs + +216 +00:01:14,840 --> 00:01:15,000 +et les + +217 +00:01:15,000 --> 00:01:15,220 +services, + +218 +00:01:15,220 --> 00:01:15,420 +d'être + +219 +00:01:15,420 --> 00:01:15,960 +dépendants + +220 +00:01:15,960 --> 00:01:16,260 +de cet + +221 +00:01:16,260 --> 00:01:16,480 +objet, + +222 +00:01:16,860 --> 00:01:17,080 +d'induire + +223 +00:01:17,080 --> 00:01:17,240 +en + +224 +00:01:17,240 --> 00:01:17,340 +fait + +225 +00:01:17,340 --> 00:01:17,520 +une + +226 +00:01:17,520 --> 00:01:17,880 +espèce + +227 +00:01:17,880 --> 00:01:18,480 +de + +228 +00:01:18,480 --> 00:01:18,600 +relation, + +229 +00:01:18,600 --> 00:01:18,940 +de + +230 +00:01:18,940 --> 00:01:19,520 +médiation + +231 +00:01:19,520 --> 00:01:19,740 +avec + +232 +00:01:19,740 --> 00:01:19,920 +le + +233 +00:01:19,920 --> 00:01:20,640 +monde + +234 +00:01:20,640 --> 00:01:21,100 +qui + +235 +00:01:21,100 --> 00:01:21,640 +rend + +236 +00:01:21,640 --> 00:01:21,780 +de + +237 +00:01:21,780 --> 00:01:22,020 +l'ampleur + +238 +00:01:22,020 --> 00:01:22,240 +et qui + +239 +00:01:22,240 --> 00:01:22,360 +amène + +240 +00:01:22,360 --> 00:01:22,560 +aussi + +241 +00:01:22,560 --> 00:01:22,720 +à des + +242 +00:01:22,720 --> 00:01:22,900 +formes + +243 +00:01:22,900 --> 00:01:23,020 +de + +244 +00:01:23,020 --> 00:01:23,260 +rejet. + +245 +00:01:23,940 --> 00:01:24,940 +Donc, + +246 +00:01:24,940 --> 00:01:24,980 +à + +247 +00:01:24,980 --> 00:01:25,360 +objet + +248 +00:01:25,360 --> 00:01:26,560 +inédit, + +249 +00:01:26,560 --> 00:01:27,000 +rapport + +250 +00:01:27,000 --> 00:01:27,800 +inédit. + +251 +00:01:28,020 --> 00:01:28,860 +Et ce + +252 +00:01:28,860 --> 00:01:29,280 +rapport, + +253 +00:01:29,280 --> 00:01:29,560 +si + +254 +00:01:29,560 --> 00:01:29,840 +j'en + +255 +00:01:29,840 --> 00:01:29,880 +crois + +256 +00:01:29,880 --> 00:01:30,540 +Nicolas, + +257 +00:01:30,540 --> 00:01:30,940 +serait + +258 +00:01:30,940 --> 00:01:31,800 +caractérisé + +259 +00:01:31,800 --> 00:01:32,440 +par un + +260 +00:01:32,440 --> 00:01:32,980 +mélange + +261 +00:01:32,980 --> 00:01:33,400 +de + +262 +00:01:33,400 --> 00:01:34,240 +dépendance + +263 +00:01:34,240 --> 00:01:34,680 +et de + +264 +00:01:34,680 --> 00:01:35,140 +rejet. + +265 +00:01:35,780 --> 00:01:36,380 +Bon, + +266 +00:01:36,380 --> 00:01:36,520 +en + +267 +00:01:36,520 --> 00:01:37,140 +vrai, + +268 +00:01:37,140 --> 00:01:37,180 +il + +269 +00:01:37,180 --> 00:01:37,580 +faudrait + +270 +00:01:37,580 --> 00:01:38,080 +remonter + +271 +00:01:38,080 --> 00:01:38,580 +très + +272 +00:01:38,580 --> 00:01:38,700 +très + +273 +00:01:38,700 --> 00:01:39,320 +finement + +274 +00:01:39,320 --> 00:01:39,700 +toute + +275 +00:01:39,700 --> 00:01:40,060 +l'histoire + +276 +00:01:40,060 --> 00:01:40,240 +des + +277 +00:01:40,240 --> 00:01:40,480 +objets + +278 +00:01:40,480 --> 00:01:41,020 +techniques + +279 +00:01:41,020 --> 00:01:41,680 +et de + +280 +00:01:41,680 --> 00:01:41,840 +leur + +281 +00:01:41,840 --> 00:01:42,320 +insertion + +282 +00:01:42,320 --> 00:01:42,480 +dans + +283 +00:01:42,480 --> 00:01:42,660 +nos + +284 +00:01:42,660 --> 00:01:42,860 +vies + +285 +00:01:42,900 --> 00:01:43,060 +pour + +286 +00:01:43,060 --> 00:01:43,660 +déterminer + +287 +00:01:43,660 --> 00:01:43,940 +si ce + +288 +00:01:43,940 --> 00:01:44,260 +rapport + +289 +00:01:44,260 --> 00:01:44,740 +est + +290 +00:01:44,740 --> 00:01:45,300 +totalement + +291 +00:01:45,300 --> 00:01:45,740 +inédit. + +292 +00:01:46,100 --> 00:01:46,360 +Mais + +293 +00:01:46,360 --> 00:01:46,920 +j'ai + +294 +00:01:46,920 --> 00:01:47,360 +l'impression + +295 +00:01:47,360 --> 00:01:47,560 +comme + +296 +00:01:47,560 --> 00:01:47,960 +ça que + +297 +00:01:47,960 --> 00:01:48,460 +Nicolas + +298 +00:01:48,460 --> 00:01:48,700 +ne se + +299 +00:01:48,700 --> 00:01:48,880 +trompe + +300 +00:01:48,880 --> 00:01:49,080 +pas + +301 +00:01:49,080 --> 00:01:49,340 +vraiment. + +302 +00:01:49,880 --> 00:01:50,080 +Pour + +303 +00:01:50,080 --> 00:01:50,240 +autant + +304 +00:01:50,240 --> 00:01:50,520 +que je + +305 +00:01:50,520 --> 00:01:51,140 +sache, + +306 +00:01:51,140 --> 00:01:51,360 +il y a + +307 +00:01:51,360 --> 00:01:51,680 +eu + +308 +00:01:51,680 --> 00:01:51,880 +plein + +309 +00:01:51,880 --> 00:01:52,060 +de + +310 +00:01:52,060 --> 00:01:52,600 +discussions + +311 +00:01:52,600 --> 00:01:52,940 +autour + +312 +00:01:52,940 --> 00:01:53,520 +de la + +313 +00:01:53,520 --> 00:01:53,860 +voiture + +314 +00:01:53,860 --> 00:01:54,060 +ou + +315 +00:01:54,060 --> 00:01:54,440 +même + +316 +00:01:54,440 --> 00:01:54,600 +du + +317 +00:01:54,600 --> 00:01:54,980 +téléphone. + +318 +00:01:55,340 --> 00:01:55,720 +Mais + +319 +00:01:55,720 --> 00:01:56,020 +la + +320 +00:01:56,020 --> 00:01:56,400 +dépendance + +321 +00:01:56,400 --> 00:01:56,620 +n'était + +322 +00:01:56,620 --> 00:01:57,160 +pas du + +323 +00:01:57,160 --> 00:01:57,460 +même + +324 +00:01:57,460 --> 00:01:57,780 +ordre. + +325 +00:01:57,780 --> 00:01:57,980 +Donc + +326 +00:01:57,980 --> 00:01:58,340 +le + +327 +00:01:58,340 --> 00:01:58,620 +rejet + +328 +00:01:58,620 --> 00:01:58,780 +non + +329 +00:01:58,780 --> 00:01:58,940 +plus + +330 +00:01:58,940 --> 00:01:59,120 +n'était + +331 +00:01:59,120 --> 00:01:59,380 +pas du + +332 +00:01:59,380 --> 00:01:59,560 +même + +333 +00:01:59,560 --> 00:01:59,840 +ordre. + +334 +00:01:59,980 --> 00:02:00,180 +On + +335 +00:02:00,180 --> 00:02:00,380 +peut + +336 +00:02:00,380 --> 00:02:00,660 +adorer + +337 +00:02:00,660 --> 00:02:00,880 +sa + +338 +00:02:00,880 --> 00:02:01,460 +bagnole, + +339 +00:02:01,460 --> 00:02:01,560 +en + +340 +00:02:01,560 --> 00:02:01,740 +avoir + +341 +00:02:01,740 --> 00:02:02,100 +besoin + +342 +00:02:02,100 --> 00:02:02,340 +pour + +343 +00:02:02,340 --> 00:02:02,680 +plein + +344 +00:02:02,680 --> 00:02:02,800 +de + +345 +00:02:02,800 --> 00:02:03,020 +choses. + +346 +00:02:03,280 --> 00:02:03,460 +Et + +347 +00:02:03,460 --> 00:02:03,860 +bien, + +348 +00:02:03,860 --> 00:02:03,980 +le + +349 +00:02:03,980 --> 00:02:04,680 +soir, + +350 +00:02:04,680 --> 00:02:04,900 +quand + +351 +00:02:04,900 --> 00:02:05,140 +on va + +352 +00:02:05,140 --> 00:02:05,380 +se + +353 +00:02:05,380 --> 00:02:05,800 +coucher, + +354 +00:02:05,800 --> 00:02:06,220 +on la + +355 +00:02:06,220 --> 00:02:06,360 +laisse. + +356 +00:02:06,980 --> 00:02:07,360 +On ne + +357 +00:02:07,360 --> 00:02:07,480 +l'a + +358 +00:02:07,480 --> 00:02:07,680 +pas + +359 +00:02:07,680 --> 00:02:07,800 +dans + +360 +00:02:07,800 --> 00:02:08,060 +la + +361 +00:02:08,060 --> 00:02:08,260 +main + +362 +00:02:08,260 --> 00:02:08,440 +quand + +363 +00:02:08,440 --> 00:02:08,680 +on est + +364 +00:02:08,680 --> 00:02:09,040 +au + +365 +00:02:09,040 --> 00:02:09,140 +lit, + +366 +00:02:09,140 --> 00:02:09,300 +on ne + +367 +00:02:09,300 --> 00:02:09,500 +l'emmène + +368 +00:02:09,500 --> 00:02:09,860 +pas au + +369 +00:02:09,860 --> 00:02:10,480 +chiottes. + +370 +00:02:10,860 --> 00:02:11,040 +On + +371 +00:02:11,040 --> 00:02:11,280 +pouvait + +372 +00:02:11,280 --> 00:02:11,480 +être + +373 +00:02:11,480 --> 00:02:12,220 +énervé + +374 +00:02:12,220 --> 00:02:12,440 +par + +375 +00:02:12,440 --> 00:02:12,700 +son + +376 +00:02:12,700 --> 00:02:13,100 +môme + +377 +00:02:13,100 --> 00:02:13,340 +qui + +378 +00:02:13,340 --> 00:02:13,760 +occupait + +379 +00:02:13,760 --> 00:02:13,800 +la + +380 +00:02:13,800 --> 00:02:14,080 +ligne + +381 +00:02:14,080 --> 00:02:14,140 +de + +382 +00:02:14,140 --> 00:02:14,600 +téléphone + +383 +00:02:14,600 --> 00:02:14,820 +pendant + +384 +00:02:14,820 --> 00:02:15,200 +une + +385 +00:02:15,200 --> 00:02:15,360 +heure + +386 +00:02:15,360 --> 00:02:15,540 +chaque + +387 +00:02:15,540 --> 00:02:15,800 +soir + +388 +00:02:15,800 --> 00:02:15,960 +pour + +389 +00:02:15,960 --> 00:02:16,280 +discuter + +390 +00:02:16,280 --> 00:02:16,480 +avec + +391 +00:02:16,480 --> 00:02:16,600 +un + +392 +00:02:16,600 --> 00:02:16,900 +copain. + +393 +00:02:17,280 --> 00:02:17,460 +Mais + +394 +00:02:17,460 --> 00:02:17,940 +ça ne + +395 +00:02:17,940 --> 00:02:18,400 +ressemblait + +396 +00:02:18,400 --> 00:02:18,940 +pas à + +397 +00:02:18,940 --> 00:02:18,980 +ce + +398 +00:02:18,980 --> 00:02:19,100 +qu'on + +399 +00:02:19,100 --> 00:02:19,480 +peut + +400 +00:02:19,480 --> 00:02:20,120 +ressentir + +401 +00:02:20,120 --> 00:02:20,460 +à voir + +402 +00:02:20,460 --> 00:02:20,680 +ce + +403 +00:02:20,680 --> 00:02:20,920 +même + +404 +00:02:20,920 --> 00:02:21,340 +môme + +405 +00:02:21,340 --> 00:02:21,880 +aujourd'hui, + +406 +00:02:22,140 --> 00:02:22,940 +continuellement + +407 +00:02:22,940 --> 00:02:23,180 +avec + +408 +00:02:23,180 --> 00:02:23,380 +son + +409 +00:02:23,380 --> 00:02:23,760 +smartphone + +410 +00:02:23,760 --> 00:02:23,940 +dans + +411 +00:02:23,940 --> 00:02:24,000 +la + +412 +00:02:24,000 --> 00:02:24,360 +main, + +413 +00:02:24,360 --> 00:02:24,520 +comme + +414 +00:02:24,520 --> 00:02:24,640 +si + +415 +00:02:24,640 --> 00:02:24,820 +c'était + +416 +00:02:24,820 --> 00:02:25,060 +une + +417 +00:02:25,060 --> 00:02:25,220 +sorte + +418 +00:02:25,220 --> 00:02:25,300 +de + +419 +00:02:25,300 --> 00:02:25,820 +pacemaker + +420 +00:02:25,820 --> 00:02:26,300 +externe, + +421 +00:02:26,340 --> 00:02:26,580 +comme + +422 +00:02:26,580 --> 00:02:26,860 +si le + +423 +00:02:26,860 --> 00:02:27,360 +lâcher + +424 +00:02:27,360 --> 00:02:27,560 +allait + +425 +00:02:27,560 --> 00:02:27,860 +entraîner + +426 +00:02:27,860 --> 00:02:28,000 +sa + +427 +00:02:28,000 --> 00:02:28,220 +mort + +428 +00:02:28,220 --> 00:02:28,840 +immédiate. + +429 +00:02:29,040 --> 00:02:29,280 +Bon, + +430 +00:02:29,280 --> 00:02:29,460 +je dis + +431 +00:02:29,460 --> 00:02:29,640 +ça + +432 +00:02:29,640 --> 00:02:29,740 +pour + +433 +00:02:29,740 --> 00:02:29,880 +le + +434 +00:02:29,880 --> 00:02:30,320 +môme, + +435 +00:02:30,320 --> 00:02:30,520 +mais + +436 +00:02:30,520 --> 00:02:30,820 +c'est + +437 +00:02:30,820 --> 00:02:31,140 +évidemment + +438 +00:02:31,140 --> 00:02:31,480 +valable + +439 +00:02:31,480 --> 00:02:31,620 +pour + +440 +00:02:31,620 --> 00:02:31,760 +nous + +441 +00:02:31,760 --> 00:02:31,960 +aussi. + +442 +00:02:32,340 --> 00:02:33,460 +Donc, + +443 +00:02:33,460 --> 00:02:33,660 +rapport + +444 +00:02:33,660 --> 00:02:34,240 +inédit. + +445 +00:02:34,240 --> 00:02:35,480 +D'accord. + +446 +00:02:35,480 --> 00:02:35,820 +Mais + +447 +00:02:35,820 --> 00:02:36,320 +pourquoi + +448 +00:02:36,320 --> 00:02:36,680 +a-t-on + +449 +00:02:36,680 --> 00:02:37,060 +l'impression + +450 +00:02:37,060 --> 00:02:37,260 +qu'on + +451 +00:02:37,260 --> 00:02:37,440 +n'en + +452 +00:02:37,440 --> 00:02:37,900 +sortira + +453 +00:02:37,900 --> 00:02:38,220 +jamais? + +454 +00:02:38,860 --> 00:02:39,340 +Est-ce + +455 +00:02:39,340 --> 00:02:39,460 +qu'il + +456 +00:02:39,460 --> 00:02:39,620 +faut + +457 +00:02:39,620 --> 00:02:39,780 +en + +458 +00:02:39,780 --> 00:02:40,120 +remettre + +459 +00:02:40,120 --> 00:02:40,340 +la + +460 +00:02:40,340 --> 00:02:40,660 +faute + +461 +00:02:40,660 --> 00:02:40,940 +sur + +462 +00:02:40,940 --> 00:02:41,280 +les + +463 +00:02:41,280 --> 00:02:41,440 +gens + +464 +00:02:41,440 --> 00:02:41,580 +qui + +465 +00:02:41,580 --> 00:02:41,720 +ont + +466 +00:02:41,720 --> 00:02:42,300 +créé + +467 +00:02:42,300 --> 00:02:42,460 +cet + +468 +00:02:42,460 --> 00:02:42,780 +outil + +469 +00:02:42,780 --> 00:02:43,340 +merveilleux + +470 +00:02:43,340 --> 00:02:43,500 +et + +471 +00:02:43,500 --> 00:02:43,860 +diabolique, + +472 +00:02:43,860 --> 00:02:43,920 +et + +473 +00:02:43,920 --> 00:02:44,400 +diabolique + +474 +00:02:44,400 --> 00:02:44,660 +parce + +475 +00:02:44,660 --> 00:02:44,840 +que + +476 +00:02:44,840 --> 00:02:45,320 +merveilleux? + +477 +00:02:46,340 --> 00:02:47,040 +Les + +478 +00:02:47,040 --> 00:02:47,480 +économistes + +479 +00:02:47,480 --> 00:02:47,680 +parlent + +480 +00:02:47,680 --> 00:02:47,820 +de + +481 +00:02:47,820 --> 00:02:48,360 +dépendance + +482 +00:02:48,360 --> 00:02:48,500 +du + +483 +00:02:48,500 --> 00:02:48,820 +sentier. + +484 +00:02:48,860 --> 00:02:49,100 +C'est + +485 +00:02:49,100 --> 00:02:49,340 +l'idée + +486 +00:02:49,340 --> 00:02:49,700 +qu'on + +487 +00:02:49,700 --> 00:02:49,880 +est + +488 +00:02:49,880 --> 00:02:50,540 +sur un + +489 +00:02:50,540 --> 00:02:50,780 +sentier + +490 +00:02:50,780 --> 00:02:50,940 +qui a + +491 +00:02:50,940 --> 00:02:51,120 +été + +492 +00:02:51,120 --> 00:02:51,900 +établi, + +493 +00:02:51,900 --> 00:02:52,120 +soit + +494 +00:02:52,120 --> 00:02:52,720 +volontairement + +495 +00:02:52,720 --> 00:02:52,800 +en + +496 +00:02:52,800 --> 00:02:53,060 +marchant + +497 +00:02:53,060 --> 00:02:54,240 +dessus, + +498 +00:02:54,240 --> 00:02:54,920 +soit + +499 +00:02:54,920 --> 00:02:55,360 +en + +500 +00:02:55,360 --> 00:02:55,500 +définissant + +501 +00:02:55,500 --> 00:02:55,760 +des + +502 +00:02:55,760 --> 00:02:56,040 +bornes, + +503 +00:02:56,040 --> 00:02:56,080 +en + +504 +00:02:56,080 --> 00:02:56,580 +définissant + +505 +00:02:56,580 --> 00:02:56,820 +une + +506 +00:02:56,820 --> 00:02:57,420 +signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_6.vtt b/tests/expected/split_subtitles/smartphone.mp3_6.vtt new file mode 100644 index 0000000000000000000000000000000000000000..61871c111372405e0b3331b5d11e0e1cbd3f450e --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_6.vtt @@ -0,0 +1,1520 @@ +WEBVTT + +00:00.380 --> 00:00.580 +C'est + +00:00.580 --> 00:00.880 +évident + +00:00.880 --> 00:01.080 +ce que + +00:01.080 --> 00:01.200 +dit + +00:01.200 --> 00:01.780 +Nicolas, + +00:01.780 --> 00:01.900 +mais + +00:01.900 --> 00:02.340 +je ne + +00:02.340 --> 00:02.380 +me + +00:02.380 --> 00:02.580 +l'étais + +00:02.580 --> 00:02.840 +jamais + +00:02.840 --> 00:03.260 +formulé + +00:03.260 --> 00:03.420 +comme + +00:03.420 --> 00:03.620 +ça. + +00:04.080 --> 00:04.340 +Ce qui + +00:04.340 --> 00:04.480 +fait + +00:04.480 --> 00:04.660 +la + +00:04.660 --> 00:05.000 +force + +00:05.000 --> 00:05.200 +du + +00:05.200 --> 00:05.880 +smartphone, + +00:05.880 --> 00:06.120 +c'est + +00:06.120 --> 00:06.260 +pas + +00:06.260 --> 00:06.520 +seulement + +00:06.520 --> 00:07.380 +l'accumulation + +00:07.380 --> 00:07.560 +des + +00:07.560 --> 00:07.920 +fonctions, + +00:08.320 --> 00:08.440 +mais + +00:08.440 --> 00:08.600 +la + +00:08.600 --> 00:08.900 +manière + +00:08.900 --> 00:09.100 +dont + +00:09.100 --> 00:09.480 +elles + +00:09.480 --> 00:10.320 +interagissent + +00:10.320 --> 00:10.580 +entre + +00:10.580 --> 00:10.880 +elles. + +00:10.960 --> 00:11.220 +Ce qui + +00:11.220 --> 00:11.400 +dit + +00:11.400 --> 00:11.560 +d'ailleurs + +00:11.560 --> 00:11.780 +sur la + +00:11.780 --> 00:12.120 +photo, + +00:12.120 --> 00:12.200 +c'est + +00:12.200 --> 00:12.420 +hyper + +00:12.420 --> 00:13.000 +convaincant. + +00:13.340 --> 00:13.620 +Alors + +00:13.620 --> 00:14.340 +évidemment, + +00:14.340 --> 00:14.380 +il + +00:14.380 --> 00:14.740 +faudrait + +00:14.740 --> 00:15.160 +ajouter + +00:15.160 --> 00:15.520 +les + +00:15.520 --> 00:16.020 +interfaces. + +00:16.220 --> 00:16.700 +L'écran + +00:16.700 --> 00:17.060 +tactile + +00:17.060 --> 00:17.880 +a été + +00:17.880 --> 00:18.280 +beaucoup + +00:18.280 --> 00:18.620 +très + +00:18.620 --> 00:18.900 +souvent + +00:18.900 --> 00:19.360 +mentionné. + +00:19.840 --> 00:20.220 +Mais + +00:20.220 --> 00:20.520 +bon, + +00:20.520 --> 00:20.600 +il + +00:20.600 --> 00:20.700 +faut + +00:20.700 --> 00:20.840 +dire + +00:20.840 --> 00:20.960 +qu'il + +00:20.960 --> 00:21.260 +profite + +00:21.260 --> 00:21.680 +aussi + +00:21.680 --> 00:22.100 +de 20 + +00:22.100 --> 00:22.320 +ans + +00:22.320 --> 00:22.480 +pendant + +00:22.480 --> 00:22.920 +lesquels + +00:22.920 --> 00:23.040 +les + +00:23.040 --> 00:23.540 +ordinateurs + +00:23.540 --> 00:23.720 +nous + +00:23.720 --> 00:23.820 +ont + +00:23.820 --> 00:24.100 +appris + +00:24.100 --> 00:24.240 +à + +00:24.240 --> 00:24.500 +cliquer + +00:24.500 --> 00:24.660 +sur + +00:24.660 --> 00:24.940 +des + +00:24.940 --> 00:25.260 +icônes. + +00:25.420 --> 00:25.760 +Sauf + +00:25.760 --> 00:26.660 +que le + +00:26.660 --> 00:27.060 +smartphone + +00:27.060 --> 00:27.440 +ajoute + +00:27.440 --> 00:27.620 +le + +00:27.620 --> 00:28.180 +toucher, + +00:28.180 --> 00:28.280 +ce qui + +00:28.280 --> 00:28.480 +rend + +00:28.480 --> 00:28.680 +le + +00:28.680 --> 00:29.100 +contact + +00:29.100 --> 00:29.460 +plus + +00:29.460 --> 00:30.220 +direct, + +00:30.220 --> 00:30.260 +plus + +00:30.260 --> 00:30.640 +sensible. + +00:31.040 --> 00:31.220 +Et + +00:31.220 --> 00:31.360 +puis + +00:31.360 --> 00:31.700 +évidemment, + +00:31.700 --> 00:31.740 +il + +00:31.740 --> 00:31.940 +faudrait + +00:31.940 --> 00:32.120 +parler + +00:32.120 --> 00:32.340 +aussi + +00:32.340 --> 00:32.480 +des + +00:32.480 --> 00:32.900 +applications + +00:32.900 --> 00:33.180 +qui + +00:33.180 --> 00:33.740 +permettent + +00:33.740 --> 00:33.960 +de + +00:33.960 --> 00:34.420 +contourner + +00:34.420 --> 00:34.520 +le + +00:34.520 --> 00:34.800 +côté + +00:34.800 --> 00:35.320 +touffu + +00:35.320 --> 00:35.780 +de la + +00:35.780 --> 00:36.240 +navigation + +00:36.240 --> 00:36.600 +web + +00:36.600 --> 00:36.780 +pour + +00:36.780 --> 00:36.980 +aller + +00:36.980 --> 00:37.520 +directement + +00:37.520 --> 00:37.680 +au + +00:37.680 --> 00:37.820 +but. + +00:37.820 --> 00:38.760 +Bref, + +00:38.760 --> 00:38.980 +tout + +00:38.980 --> 00:39.420 +ça, + +00:39.420 --> 00:39.880 +ce sont + +00:39.880 --> 00:40.160 +les + +00:40.160 --> 00:40.680 +conditions + +00:40.680 --> 00:40.960 +qui + +00:40.960 --> 00:41.460 +permettent + +00:41.460 --> 00:41.600 +de + +00:41.600 --> 00:42.060 +créer + +00:42.060 --> 00:42.380 +cet + +00:42.380 --> 00:42.600 +objet + +00:42.600 --> 00:42.800 +dont + +00:42.800 --> 00:43.260 +Nicolas + +00:43.260 --> 00:43.500 +dit + +00:43.500 --> 00:43.700 +qu'il + +00:43.700 --> 00:43.880 +est + +00:43.880 --> 00:44.980 +vraisemblablement + +00:44.980 --> 00:45.380 +inédit + +00:45.380 --> 00:45.700 +dans + +00:45.700 --> 00:45.980 +l'histoire + +00:45.980 --> 00:46.180 +de + +00:46.180 --> 00:46.580 +l'humanité. + +00:46.600 --> 00:47.240 +Mais + +00:47.240 --> 00:47.720 +ça, + +00:47.720 --> 00:47.840 +ça soulève + +00:47.840 --> 00:48.020 +une + +00:48.020 --> 00:48.260 +autre + +00:48.260 --> 00:48.820 +interrogation. + +00:49.220 --> 00:49.620 +Est-ce + +00:49.620 --> 00:49.820 +que le + +00:49.820 --> 00:49.980 +fait + +00:49.980 --> 00:50.140 +que + +00:50.140 --> 00:50.320 +cet + +00:50.320 --> 00:50.660 +objet + +00:50.660 --> 00:51.120 +soit + +00:51.120 --> 00:51.800 +inédit + +00:51.800 --> 00:52.320 +induit + +00:52.320 --> 00:52.420 +que + +00:52.420 --> 00:52.720 +notre + +00:52.720 --> 00:53.280 +rapport + +00:53.280 --> 00:53.660 +à lui + +00:53.660 --> 00:54.020 +est + +00:54.020 --> 00:54.540 +aussi + +00:54.540 --> 00:54.700 +un + +00:54.700 --> 00:55.000 +rapport + +00:55.000 --> 00:55.460 +inédit? + +00:55.460 --> 00:55.900 +Je + +00:55.900 --> 00:56.000 +veux + +00:56.000 --> 00:56.240 +dire, + +00:56.240 --> 00:56.360 +est-ce + +00:56.360 --> 00:56.580 +que le + +00:56.580 --> 00:56.880 +rapport + +00:56.880 --> 00:57.040 +qu'on + +00:57.040 --> 00:57.280 +a au + +00:57.280 --> 00:57.600 +smartphone + +00:57.600 --> 00:57.920 +est + +00:57.920 --> 00:58.240 +comparable + +00:58.240 --> 00:58.480 +à + +00:58.480 --> 00:58.660 +celui + +00:58.660 --> 00:58.900 +qu'on + +00:58.900 --> 00:59.320 +entretenait + +00:59.320 --> 00:59.460 +à + +00:59.460 --> 00:59.700 +d'autres + +00:59.700 --> 00:59.960 +objets + +00:59.960 --> 01:00.460 +techniques + +01:00.460 --> 01:00.880 +comme + +01:00.880 --> 01:01.500 +la + +01:01.500 --> 01:02.060 +voiture + +01:02.060 --> 01:02.680 +ou le + +01:02.680 --> 01:03.120 +téléphone? + +01:03.360 --> 01:05.480 +Il n'y + +01:05.480 --> 01:05.660 +a pas + +01:05.660 --> 01:06.220 +d'équivalent + +01:06.220 --> 01:06.420 +en + +01:06.420 --> 01:06.660 +fait. + +01:06.880 --> 01:06.980 +Et + +01:06.980 --> 01:07.080 +donc + +01:07.080 --> 01:07.280 +cette + +01:07.280 --> 01:07.540 +espèce + +01:07.540 --> 01:07.680 +de + +01:07.680 --> 01:08.480 +nouveauté + +01:08.480 --> 01:08.660 +dans + +01:08.660 --> 01:08.940 +la + +01:08.940 --> 01:09.220 +relation + +01:09.220 --> 01:09.380 +à + +01:09.380 --> 01:10.240 +l'objet, + +01:10.240 --> 01:10.380 +c'est + +01:10.380 --> 01:10.640 +fascinant + +01:10.640 --> 01:10.760 +et + +01:10.760 --> 01:11.520 +terrifiant. + +01:11.620 --> 01:11.860 +Parce + +01:11.860 --> 01:12.120 +qu'on + +01:12.120 --> 01:12.440 +a + +01:12.440 --> 01:13.560 +l'impression, + +01:13.560 --> 01:13.840 +comme + +01:13.840 --> 01:14.000 +le + +01:14.000 --> 01:14.200 +disent + +01:14.200 --> 01:14.400 +les + +01:14.400 --> 01:14.840 +utilisateurs + +01:14.840 --> 01:15.000 +et les + +01:15.000 --> 01:15.220 +services, + +01:15.220 --> 01:15.420 +d'être + +01:15.420 --> 01:15.960 +dépendants + +01:15.960 --> 01:16.260 +de cet + +01:16.260 --> 01:16.480 +objet, + +01:16.860 --> 01:17.080 +d'induire + +01:17.080 --> 01:17.240 +en + +01:17.240 --> 01:17.340 +fait + +01:17.340 --> 01:17.520 +une + +01:17.520 --> 01:17.880 +espèce + +01:17.880 --> 01:18.480 +de + +01:18.480 --> 01:18.600 +relation, + +01:18.600 --> 01:18.940 +de + +01:18.940 --> 01:19.520 +médiation + +01:19.520 --> 01:19.740 +avec + +01:19.740 --> 01:19.920 +le + +01:19.920 --> 01:20.640 +monde + +01:20.640 --> 01:21.100 +qui + +01:21.100 --> 01:21.640 +rend + +01:21.640 --> 01:21.780 +de + +01:21.780 --> 01:22.020 +l'ampleur + +01:22.020 --> 01:22.240 +et qui + +01:22.240 --> 01:22.360 +amène + +01:22.360 --> 01:22.560 +aussi + +01:22.560 --> 01:22.720 +à des + +01:22.720 --> 01:22.900 +formes + +01:22.900 --> 01:23.020 +de + +01:23.020 --> 01:23.260 +rejet. + +01:23.940 --> 01:24.940 +Donc, + +01:24.940 --> 01:24.980 +à + +01:24.980 --> 01:25.360 +objet + +01:25.360 --> 01:26.560 +inédit, + +01:26.560 --> 01:27.000 +rapport + +01:27.000 --> 01:27.800 +inédit. + +01:28.020 --> 01:28.860 +Et ce + +01:28.860 --> 01:29.280 +rapport, + +01:29.280 --> 01:29.560 +si + +01:29.560 --> 01:29.840 +j'en + +01:29.840 --> 01:29.880 +crois + +01:29.880 --> 01:30.540 +Nicolas, + +01:30.540 --> 01:30.940 +serait + +01:30.940 --> 01:31.800 +caractérisé + +01:31.800 --> 01:32.440 +par un + +01:32.440 --> 01:32.980 +mélange + +01:32.980 --> 01:33.400 +de + +01:33.400 --> 01:34.240 +dépendance + +01:34.240 --> 01:34.680 +et de + +01:34.680 --> 01:35.140 +rejet. + +01:35.780 --> 01:36.380 +Bon, + +01:36.380 --> 01:36.520 +en + +01:36.520 --> 01:37.140 +vrai, + +01:37.140 --> 01:37.180 +il + +01:37.180 --> 01:37.580 +faudrait + +01:37.580 --> 01:38.080 +remonter + +01:38.080 --> 01:38.580 +très + +01:38.580 --> 01:38.700 +très + +01:38.700 --> 01:39.320 +finement + +01:39.320 --> 01:39.700 +toute + +01:39.700 --> 01:40.060 +l'histoire + +01:40.060 --> 01:40.240 +des + +01:40.240 --> 01:40.480 +objets + +01:40.480 --> 01:41.020 +techniques + +01:41.020 --> 01:41.680 +et de + +01:41.680 --> 01:41.840 +leur + +01:41.840 --> 01:42.320 +insertion + +01:42.320 --> 01:42.480 +dans + +01:42.480 --> 01:42.660 +nos + +01:42.660 --> 01:42.860 +vies + +01:42.900 --> 01:43.060 +pour + +01:43.060 --> 01:43.660 +déterminer + +01:43.660 --> 01:43.940 +si ce + +01:43.940 --> 01:44.260 +rapport + +01:44.260 --> 01:44.740 +est + +01:44.740 --> 01:45.300 +totalement + +01:45.300 --> 01:45.740 +inédit. + +01:46.100 --> 01:46.360 +Mais + +01:46.360 --> 01:46.920 +j'ai + +01:46.920 --> 01:47.360 +l'impression + +01:47.360 --> 01:47.560 +comme + +01:47.560 --> 01:47.960 +ça que + +01:47.960 --> 01:48.460 +Nicolas + +01:48.460 --> 01:48.700 +ne se + +01:48.700 --> 01:48.880 +trompe + +01:48.880 --> 01:49.080 +pas + +01:49.080 --> 01:49.340 +vraiment. + +01:49.880 --> 01:50.080 +Pour + +01:50.080 --> 01:50.240 +autant + +01:50.240 --> 01:50.520 +que je + +01:50.520 --> 01:51.140 +sache, + +01:51.140 --> 01:51.360 +il y a + +01:51.360 --> 01:51.680 +eu + +01:51.680 --> 01:51.880 +plein + +01:51.880 --> 01:52.060 +de + +01:52.060 --> 01:52.600 +discussions + +01:52.600 --> 01:52.940 +autour + +01:52.940 --> 01:53.520 +de la + +01:53.520 --> 01:53.860 +voiture + +01:53.860 --> 01:54.060 +ou + +01:54.060 --> 01:54.440 +même + +01:54.440 --> 01:54.600 +du + +01:54.600 --> 01:54.980 +téléphone. + +01:55.340 --> 01:55.720 +Mais + +01:55.720 --> 01:56.020 +la + +01:56.020 --> 01:56.400 +dépendance + +01:56.400 --> 01:56.620 +n'était + +01:56.620 --> 01:57.160 +pas du + +01:57.160 --> 01:57.460 +même + +01:57.460 --> 01:57.780 +ordre. + +01:57.780 --> 01:57.980 +Donc + +01:57.980 --> 01:58.340 +le + +01:58.340 --> 01:58.620 +rejet + +01:58.620 --> 01:58.780 +non + +01:58.780 --> 01:58.940 +plus + +01:58.940 --> 01:59.120 +n'était + +01:59.120 --> 01:59.380 +pas du + +01:59.380 --> 01:59.560 +même + +01:59.560 --> 01:59.840 +ordre. + +01:59.980 --> 02:00.180 +On + +02:00.180 --> 02:00.380 +peut + +02:00.380 --> 02:00.660 +adorer + +02:00.660 --> 02:00.880 +sa + +02:00.880 --> 02:01.460 +bagnole, + +02:01.460 --> 02:01.560 +en + +02:01.560 --> 02:01.740 +avoir + +02:01.740 --> 02:02.100 +besoin + +02:02.100 --> 02:02.340 +pour + +02:02.340 --> 02:02.680 +plein + +02:02.680 --> 02:02.800 +de + +02:02.800 --> 02:03.020 +choses. + +02:03.280 --> 02:03.460 +Et + +02:03.460 --> 02:03.860 +bien, + +02:03.860 --> 02:03.980 +le + +02:03.980 --> 02:04.680 +soir, + +02:04.680 --> 02:04.900 +quand + +02:04.900 --> 02:05.140 +on va + +02:05.140 --> 02:05.380 +se + +02:05.380 --> 02:05.800 +coucher, + +02:05.800 --> 02:06.220 +on la + +02:06.220 --> 02:06.360 +laisse. + +02:06.980 --> 02:07.360 +On ne + +02:07.360 --> 02:07.480 +l'a + +02:07.480 --> 02:07.680 +pas + +02:07.680 --> 02:07.800 +dans + +02:07.800 --> 02:08.060 +la + +02:08.060 --> 02:08.260 +main + +02:08.260 --> 02:08.440 +quand + +02:08.440 --> 02:08.680 +on est + +02:08.680 --> 02:09.040 +au + +02:09.040 --> 02:09.140 +lit, + +02:09.140 --> 02:09.300 +on ne + +02:09.300 --> 02:09.500 +l'emmène + +02:09.500 --> 02:09.860 +pas au + +02:09.860 --> 02:10.480 +chiottes. + +02:10.860 --> 02:11.040 +On + +02:11.040 --> 02:11.280 +pouvait + +02:11.280 --> 02:11.480 +être + +02:11.480 --> 02:12.220 +énervé + +02:12.220 --> 02:12.440 +par + +02:12.440 --> 02:12.700 +son + +02:12.700 --> 02:13.100 +môme + +02:13.100 --> 02:13.340 +qui + +02:13.340 --> 02:13.760 +occupait + +02:13.760 --> 02:13.800 +la + +02:13.800 --> 02:14.080 +ligne + +02:14.080 --> 02:14.140 +de + +02:14.140 --> 02:14.600 +téléphone + +02:14.600 --> 02:14.820 +pendant + +02:14.820 --> 02:15.200 +une + +02:15.200 --> 02:15.360 +heure + +02:15.360 --> 02:15.540 +chaque + +02:15.540 --> 02:15.800 +soir + +02:15.800 --> 02:15.960 +pour + +02:15.960 --> 02:16.280 +discuter + +02:16.280 --> 02:16.480 +avec + +02:16.480 --> 02:16.600 +un + +02:16.600 --> 02:16.900 +copain. + +02:17.280 --> 02:17.460 +Mais + +02:17.460 --> 02:17.940 +ça ne + +02:17.940 --> 02:18.400 +ressemblait + +02:18.400 --> 02:18.940 +pas à + +02:18.940 --> 02:18.980 +ce + +02:18.980 --> 02:19.100 +qu'on + +02:19.100 --> 02:19.480 +peut + +02:19.480 --> 02:20.120 +ressentir + +02:20.120 --> 02:20.460 +à voir + +02:20.460 --> 02:20.680 +ce + +02:20.680 --> 02:20.920 +même + +02:20.920 --> 02:21.340 +môme + +02:21.340 --> 02:21.880 +aujourd'hui, + +02:22.140 --> 02:22.940 +continuellement + +02:22.940 --> 02:23.180 +avec + +02:23.180 --> 02:23.380 +son + +02:23.380 --> 02:23.760 +smartphone + +02:23.760 --> 02:23.940 +dans + +02:23.940 --> 02:24.000 +la + +02:24.000 --> 02:24.360 +main, + +02:24.360 --> 02:24.520 +comme + +02:24.520 --> 02:24.640 +si + +02:24.640 --> 02:24.820 +c'était + +02:24.820 --> 02:25.060 +une + +02:25.060 --> 02:25.220 +sorte + +02:25.220 --> 02:25.300 +de + +02:25.300 --> 02:25.820 +pacemaker + +02:25.820 --> 02:26.300 +externe, + +02:26.340 --> 02:26.580 +comme + +02:26.580 --> 02:26.860 +si le + +02:26.860 --> 02:27.360 +lâcher + +02:27.360 --> 02:27.560 +allait + +02:27.560 --> 02:27.860 +entraîner + +02:27.860 --> 02:28.000 +sa + +02:28.000 --> 02:28.220 +mort + +02:28.220 --> 02:28.840 +immédiate. + +02:29.040 --> 02:29.280 +Bon, + +02:29.280 --> 02:29.460 +je dis + +02:29.460 --> 02:29.640 +ça + +02:29.640 --> 02:29.740 +pour + +02:29.740 --> 02:29.880 +le + +02:29.880 --> 02:30.320 +môme, + +02:30.320 --> 02:30.520 +mais + +02:30.520 --> 02:30.820 +c'est + +02:30.820 --> 02:31.140 +évidemment + +02:31.140 --> 02:31.480 +valable + +02:31.480 --> 02:31.620 +pour + +02:31.620 --> 02:31.760 +nous + +02:31.760 --> 02:31.960 +aussi. + +02:32.340 --> 02:33.460 +Donc, + +02:33.460 --> 02:33.660 +rapport + +02:33.660 --> 02:34.240 +inédit. + +02:34.240 --> 02:35.480 +D'accord. + +02:35.480 --> 02:35.820 +Mais + +02:35.820 --> 02:36.320 +pourquoi + +02:36.320 --> 02:36.680 +a-t-on + +02:36.680 --> 02:37.060 +l'impression + +02:37.060 --> 02:37.260 +qu'on + +02:37.260 --> 02:37.440 +n'en + +02:37.440 --> 02:37.900 +sortira + +02:37.900 --> 02:38.220 +jamais? + +02:38.860 --> 02:39.340 +Est-ce + +02:39.340 --> 02:39.460 +qu'il + +02:39.460 --> 02:39.620 +faut + +02:39.620 --> 02:39.780 +en + +02:39.780 --> 02:40.120 +remettre + +02:40.120 --> 02:40.340 +la + +02:40.340 --> 02:40.660 +faute + +02:40.660 --> 02:40.940 +sur + +02:40.940 --> 02:41.280 +les + +02:41.280 --> 02:41.440 +gens + +02:41.440 --> 02:41.580 +qui + +02:41.580 --> 02:41.720 +ont + +02:41.720 --> 02:42.300 +créé + +02:42.300 --> 02:42.460 +cet + +02:42.460 --> 02:42.780 +outil + +02:42.780 --> 02:43.340 +merveilleux + +02:43.340 --> 02:43.500 +et + +02:43.500 --> 02:43.860 +diabolique, + +02:43.860 --> 02:43.920 +et + +02:43.920 --> 02:44.400 +diabolique + +02:44.400 --> 02:44.660 +parce + +02:44.660 --> 02:44.840 +que + +02:44.840 --> 02:45.320 +merveilleux? + +02:46.340 --> 02:47.040 +Les + +02:47.040 --> 02:47.480 +économistes + +02:47.480 --> 02:47.680 +parlent + +02:47.680 --> 02:47.820 +de + +02:47.820 --> 02:48.360 +dépendance + +02:48.360 --> 02:48.500 +du + +02:48.500 --> 02:48.820 +sentier. + +02:48.860 --> 02:49.100 +C'est + +02:49.100 --> 02:49.340 +l'idée + +02:49.340 --> 02:49.700 +qu'on + +02:49.700 --> 02:49.880 +est + +02:49.880 --> 02:50.540 +sur un + +02:50.540 --> 02:50.780 +sentier + +02:50.780 --> 02:50.940 +qui a + +02:50.940 --> 02:51.120 +été + +02:51.120 --> 02:51.900 +établi, + +02:51.900 --> 02:52.120 +soit + +02:52.120 --> 02:52.720 +volontairement + +02:52.720 --> 02:52.800 +en + +02:52.800 --> 02:53.060 +marchant + +02:53.060 --> 02:54.240 +dessus, + +02:54.240 --> 02:54.920 +soit + +02:54.920 --> 02:55.360 +en + +02:55.360 --> 02:55.500 +définissant + +02:55.500 --> 02:55.760 +des + +02:55.760 --> 02:56.040 +bornes, + +02:56.040 --> 02:56.080 +en + +02:56.080 --> 02:56.580 +définissant + +02:56.580 --> 02:56.820 +une + +02:56.820 --> 02:57.420 +signalétique. + diff --git a/tests/expected/tiny.en.cpu/nocond_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en.cpu/nocond_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..be3e848ef767849ee49d451215e9eb018ec0f049 --- /dev/null +++ b/tests/expected/tiny.en.cpu/nocond_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,204 @@ +{ + "text": " Mohoo! Let's go with it again! Mohoo! Mohoo! Mohoo! Let's go with it again!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 2.98, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.8020243644714355, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.11818066984415054, + "confidence": 0.496, + "words": [ + { + "text": "Mohoo!", + "start": 0.44, + "end": 1.72, + "confidence": 0.271 + }, + { + "text": "Let's", + "start": 1.72, + "end": 2.04, + "confidence": 0.678 + }, + { + "text": "go", + "start": 2.04, + "end": 2.18, + "confidence": 0.914 + }, + { + "text": "with", + "start": 2.18, + "end": 2.38, + "confidence": 0.234 + }, + { + "text": "it", + "start": 2.38, + "end": 2.44, + "confidence": 0.933 + }, + { + "text": "again!", + "start": 2.44, + "end": 2.98, + "confidence": 0.996 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 30.06, + "end": 31.25, + "text": " Mohoo!", + "tokens": [ + 337, + 1219, + 2238, + 0 + ], + "temperature": 0.0, + "avg_logprob": -0.8320662379264832, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.15341022610664368, + "confidence": 0.308, + "words": [ + { + "text": "Mohoo!", + "start": 30.06, + "end": 31.25, + "confidence": 0.308 + } + ] + }, + { + "id": 2, + "seek": 3100, + "start": 31.25, + "end": 32.48, + "text": " Mohoo!", + "tokens": [ + 337, + 1219, + 2238, + 0 + ], + "temperature": 0.0, + "avg_logprob": -0.767271101474762, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.1705959141254425, + "confidence": 0.329, + "words": [ + { + "text": "Mohoo!", + "start": 31.25, + "end": 32.48, + "confidence": 0.329 + } + ] + }, + { + "id": 3, + "seek": 3200, + "start": 32.98, + "end": 33.8, + "text": " Mohoo!", + "tokens": [ + 337, + 1219, + 2238, + 0 + ], + "temperature": 0.0, + "avg_logprob": -0.5514491200447083, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.1296440064907074, + "confidence": 0.338, + "words": [ + { + "text": "Mohoo!", + "start": 32.98, + "end": 33.8, + "confidence": 0.338 + } + ] + }, + { + "id": 4, + "seek": 3400, + "start": 34.4, + "end": 35.48, + "text": " Let's go with it again!", + "tokens": [ + 50363, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50463 + ], + "temperature": 0.0, + "avg_logprob": -0.3764993667602539, + "compression_ratio": 0.7419354838709677, + "no_speech_prob": 0.05320969223976135, + "confidence": 0.815, + "words": [ + { + "text": "Let's", + "start": 34.4, + "end": 34.58, + "confidence": 0.862 + }, + { + "text": "go", + "start": 34.58, + "end": 34.76, + "confidence": 0.923 + }, + { + "text": "with", + "start": 34.76, + "end": 34.96, + "confidence": 0.627 + }, + { + "text": "it", + "start": 34.96, + "end": 35.0, + "confidence": 0.686 + }, + { + "text": "again!", + "start": 35.0, + "end": 35.48, + "confidence": 0.992 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny.en/accurate_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en/accurate_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..16f2279e1a44f137f9257956e4defbea566c9b11 --- /dev/null +++ b/tests/expected/tiny.en/accurate_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,134 @@ +{ + "text": " Mohoo, let's go with it again Mohoo, let's go with it again", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.84, + "text": " Mohoo, let's go with it again", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 11, + 1309, + 338, + 467, + 351, + 340, + 757, + 50501 + ], + "temperature": 0.0, + "avg_logprob": -0.7691173553466797, + "compression_ratio": 0.7837837837837838, + "no_speech_prob": 0.11803468316793442, + "confidence": 0.485, + "words": [ + { + "text": "Mohoo,", + "start": 0.42, + "end": 1.38, + "confidence": 0.244 + }, + { + "text": "let's", + "start": 1.92, + "end": 2.2, + "confidence": 0.91 + }, + { + "text": "go", + "start": 2.2, + "end": 2.3, + "confidence": 0.908 + }, + { + "text": "with", + "start": 2.3, + "end": 2.46, + "confidence": 0.287 + }, + { + "text": "it", + "start": 2.46, + "end": 2.56, + "confidence": 0.939 + }, + { + "text": "again", + "start": 2.56, + "end": 2.84, + "confidence": 0.997 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.86, + "end": 35.42, + "text": " Mohoo, let's go with it again", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 11, + 1309, + 338, + 467, + 351, + 340, + 757, + 50627 + ], + "temperature": 0.0, + "avg_logprob": -0.4134977780855619, + "compression_ratio": 0.7837837837837838, + "no_speech_prob": 0.3501730263233185, + "confidence": 0.544, + "words": [ + { + "text": "Mohoo,", + "start": 32.86, + "end": 34.02, + "confidence": 0.277 + }, + { + "text": "let's", + "start": 34.46, + "end": 34.74, + "confidence": 0.89 + }, + { + "text": "go", + "start": 34.74, + "end": 34.86, + "confidence": 0.936 + }, + { + "text": "with", + "start": 34.86, + "end": 34.98, + "confidence": 0.658 + }, + { + "text": "it", + "start": 34.98, + "end": 35.1, + "confidence": 0.79 + }, + { + "text": "again", + "start": 35.1, + "end": 35.42, + "confidence": 0.997 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny.en/efficient_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en/efficient_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2ca37d298cbae433aebd2f085f0297b2ad9e522a --- /dev/null +++ b/tests/expected/tiny.en/efficient_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,136 @@ +{ + "text": " Mohoo! Let's go with it again! Mohoo! Let's go with it again!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.8, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.8018474578857422, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.11800757050514221, + "confidence": 0.496, + "words": [ + { + "text": "Mohoo!", + "start": 0.42, + "end": 1.36, + "confidence": 0.271 + }, + { + "text": "Let's", + "start": 1.9, + "end": 2.18, + "confidence": 0.677 + }, + { + "text": "go", + "start": 2.18, + "end": 2.32, + "confidence": 0.915 + }, + { + "text": "with", + "start": 2.32, + "end": 2.46, + "confidence": 0.233 + }, + { + "text": "it", + "start": 2.46, + "end": 2.56, + "confidence": 0.933 + }, + { + "text": "again!", + "start": 2.56, + "end": 2.8, + "confidence": 0.996 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.98, + "end": 35.44, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50663 + ], + "temperature": 0.0, + "avg_logprob": -0.19353563444955008, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.4543602466583252, + "confidence": 0.952, + "words": [ + { + "text": "Mohoo!", + "start": 32.98, + "end": 34.02, + "confidence": 0.896 + }, + { + "text": "Let's", + "start": 34.44, + "end": 34.72, + "confidence": 0.961 + }, + { + "text": "go", + "start": 34.72, + "end": 34.84, + "confidence": 0.99 + }, + { + "text": "with", + "start": 34.84, + "end": 35.0, + "confidence": 0.985 + }, + { + "text": "it", + "start": 35.0, + "end": 35.1, + "confidence": 0.991 + }, + { + "text": "again!", + "start": 35.1, + "end": 35.44, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny.en/nocond_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en/nocond_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9fc6eebe7ed936f6820728b331559b82c555af79 --- /dev/null +++ b/tests/expected/tiny.en/nocond_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,154 @@ +{ + "text": " Mohoo! Let's go with it again! Mohoo! Let's go with it again!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.8, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.8018474578857422, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.11800757050514221, + "confidence": 0.496, + "words": [ + { + "text": "Mohoo!", + "start": 0.42, + "end": 1.36, + "confidence": 0.271 + }, + { + "text": "Let's", + "start": 1.9, + "end": 2.18, + "confidence": 0.677 + }, + { + "text": "go", + "start": 2.18, + "end": 2.32, + "confidence": 0.915 + }, + { + "text": "with", + "start": 2.32, + "end": 2.46, + "confidence": 0.233 + }, + { + "text": "it", + "start": 2.46, + "end": 2.56, + "confidence": 0.933 + }, + { + "text": "again!", + "start": 2.56, + "end": 2.8, + "confidence": 0.996 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 30.0, + "end": 30.06, + "text": " Mohoo!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 50413 + ], + "temperature": 0.0, + "avg_logprob": -0.8362894058227539, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.13507936894893646, + "confidence": 0.306, + "words": [ + { + "text": "Mohoo!", + "start": 30.0, + "end": 30.06, + "confidence": 0.306 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 34.38, + "end": 35.36, + "text": " Let's go with it again!", + "tokens": [ + 50413, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50613 + ], + "temperature": 0.0, + "avg_logprob": -0.8362894058227539, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.13507936894893646, + "confidence": 0.847, + "words": [ + { + "text": "Let's", + "start": 34.38, + "end": 34.74, + "confidence": 0.876 + }, + { + "text": "go", + "start": 34.74, + "end": 34.86, + "confidence": 0.959 + }, + { + "text": "with", + "start": 34.86, + "end": 34.98, + "confidence": 0.619 + }, + { + "text": "it", + "start": 34.98, + "end": 35.1, + "confidence": 0.813 + }, + { + "text": "again!", + "start": 35.1, + "end": 35.36, + "confidence": 0.997 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto.cpu/gaenswein15.mp3.words.json b/tests/expected/tiny_auto.cpu/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2fa1b2d83ea55923062a1564d4750ee40a08e1b9 --- /dev/null +++ b/tests/expected/tiny_auto.cpu/gaenswein15.mp3.words.json @@ -0,0 +1,318 @@ +{ + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen Rätos ist dann nicht so weitergegangen wie sich Papsbelle dick das gewünscht hatte. Das hat er als Meshale im Rätos.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.9, + "end": 12.76, + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen Rätos ist dann nicht so weitergegangen wie sich Papsbelle dick das gewünscht hatte.", + "tokens": [ + 9233, + 6216, + 2164, + 13034, + 1418, + 376, + 14935, + 69, + 37503, + 2957, + 39498, + 3907, + 376, + 14935, + 1220, + 2959, + 978, + 39428, + 765, + 7698, + 68, + 10126, + 730, + 677, + 858, + 6282, + 497, + 3628, + 329, + 1418, + 3594, + 1979, + 370, + 8988, + 432, + 47152, + 3355, + 3041, + 430, + 2382, + 65, + 4434, + 18659, + 1482, + 6906, + 3412, + 82, + 4701, + 13299, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6555103008563702, + "compression_ratio": 1.2619047619047619, + "no_speech_prob": 0.11051499098539352, + "confidence": 0.541, + "words": [ + { + "text": "Wie", + "start": 0.9, + "end": 1.06, + "confidence": 0.478 + }, + { + "text": "wieder", + "start": 1.06, + "end": 1.32, + "confidence": 0.901 + }, + { + "text": "zu", + "start": 1.32, + "end": 1.54, + "confidence": 0.249 + }, + { + "text": "dazu", + "start": 1.54, + "end": 1.86, + "confidence": 0.131 + }, + { + "text": "ist", + "start": 1.86, + "end": 2.12, + "confidence": 0.586 + }, + { + "text": "Meshfuchs", + "start": 2.12, + "end": 2.72, + "confidence": 0.332 + }, + { + "text": "von", + "start": 2.72, + "end": 2.94, + "confidence": 0.626 + }, + { + "text": "1962", + "start": 2.94, + "end": 4.72, + "confidence": 0.208 + }, + { + "text": "als", + "start": 4.72, + "end": 5.22, + "confidence": 0.885 + }, + { + "text": "Meshale", + "start": 5.22, + "end": 5.7, + "confidence": 0.385 + }, + { + "text": "für", + "start": 5.7, + "end": 5.9, + "confidence": 0.933 + }, + { + "text": "die", + "start": 5.9, + "end": 6.08, + "confidence": 0.972 + }, + { + "text": "außerordentliche", + "start": 6.08, + "end": 6.98, + "confidence": 0.899 + }, + { + "text": "Form", + "start": 6.98, + "end": 7.22, + "confidence": 0.655 + }, + { + "text": "des", + "start": 7.22, + "end": 7.58, + "confidence": 0.971 + }, + { + "text": "grullischen", + "start": 7.58, + "end": 8.04, + "confidence": 0.384 + }, + { + "text": "Rätos", + "start": 8.04, + "end": 8.58, + "confidence": 0.368 + }, + { + "text": "ist", + "start": 8.58, + "end": 9.6, + "confidence": 0.549 + }, + { + "text": "dann", + "start": 9.6, + "end": 9.74, + "confidence": 0.5 + }, + { + "text": "nicht", + "start": 9.74, + "end": 9.92, + "confidence": 0.936 + }, + { + "text": "so", + "start": 9.92, + "end": 10.08, + "confidence": 0.953 + }, + { + "text": "weitergegangen", + "start": 10.08, + "end": 10.86, + "confidence": 0.74 + }, + { + "text": "wie", + "start": 10.86, + "end": 11.04, + "confidence": 0.703 + }, + { + "text": "sich", + "start": 11.04, + "end": 11.22, + "confidence": 0.939 + }, + { + "text": "Papsbelle", + "start": 11.22, + "end": 11.62, + "confidence": 0.356 + }, + { + "text": "dick", + "start": 11.62, + "end": 11.84, + "confidence": 0.319 + }, + { + "text": "das", + "start": 11.84, + "end": 12.08, + "confidence": 0.844 + }, + { + "text": "gewünscht", + "start": 12.08, + "end": 12.48, + "confidence": 0.818 + }, + { + "text": "hatte.", + "start": 12.48, + "end": 12.76, + "confidence": 0.922 + } + ] + }, + { + "id": 1, + "seek": 1300, + "start": 13.98, + "end": 15.22, + "text": " Das hat er als Meshale im Rätos.", + "tokens": [ + 50364, + 2846, + 2385, + 1189, + 3907, + 376, + 14935, + 1220, + 566, + 497, + 3628, + 329, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.9505692799886067, + "compression_ratio": 0.8048780487804879, + "no_speech_prob": 0.044665463268756866, + "confidence": 0.381, + "words": [ + { + "text": "Das", + "start": 13.98, + "end": 14.2, + "confidence": 0.841 + }, + { + "text": "hat", + "start": 14.2, + "end": 14.32, + "confidence": 0.878 + }, + { + "text": "er", + "start": 14.32, + "end": 14.46, + "confidence": 0.606 + }, + { + "text": "als", + "start": 14.46, + "end": 14.7, + "confidence": 0.626 + }, + { + "text": "Meshale", + "start": 14.7, + "end": 15.0, + "confidence": 0.206 + }, + { + "text": "im", + "start": 15.0, + "end": 15.04, + "confidence": 0.101 + }, + { + "text": "Rätos.", + "start": 15.04, + "end": 15.22, + "confidence": 0.464 + } + ] + } + ], + "language": "de" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto.cpu/radio_short.mp3.words.json b/tests/expected/tiny_auto.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f2388cb2c74572247a52a55a46815f03a82a84a0 --- /dev/null +++ b/tests/expected/tiny_auto.cpu/radio_short.mp3.words.json @@ -0,0 +1,3786 @@ +{ + "text": " What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, guys? What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.06, + "end": 5.92, + "text": " What are you telling me, guys?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.343, + "words": [ + { + "text": "What", + "start": 0.06, + "end": 4.9, + "confidence": 0.041 + }, + { + "text": "are", + "start": 4.9, + "end": 5.18, + "confidence": 0.603 + }, + { + "text": "you", + "start": 5.18, + "end": 5.42, + "confidence": 0.97 + }, + { + "text": "telling", + "start": 5.42, + "end": 5.64, + "confidence": 0.354 + }, + { + "text": "me,", + "start": 5.64, + "end": 5.88, + "confidence": 0.622 + }, + { + "text": "guys?", + "start": 5.88, + "end": 5.92, + "confidence": 0.307 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.5, + "end": 7.5, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.677, + "words": [ + { + "text": "What", + "start": 6.5, + "end": 6.92, + "confidence": 0.885 + }, + { + "text": "are", + "start": 6.92, + "end": 6.96, + "confidence": 0.936 + }, + { + "text": "you", + "start": 6.96, + "end": 7.02, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 7.02, + "end": 7.24, + "confidence": 0.904 + }, + { + "text": "me,", + "start": 7.24, + "end": 7.46, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 7.46, + "end": 7.5, + "confidence": 0.131 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 7.52, + "end": 8.28, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.827, + "words": [ + { + "text": "What", + "start": 7.52, + "end": 7.56, + "confidence": 0.545 + }, + { + "text": "are", + "start": 7.56, + "end": 7.6, + "confidence": 0.851 + }, + { + "text": "you", + "start": 7.6, + "end": 7.64, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 7.64, + "end": 7.68, + "confidence": 0.849 + }, + { + "text": "me,", + "start": 7.68, + "end": 8.16, + "confidence": 0.951 + }, + { + "text": "dude?", + "start": 8.16, + "end": 8.28, + "confidence": 0.857 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.46, + "end": 12.48, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.774, + "words": [ + { + "text": "What", + "start": 10.46, + "end": 10.54, + "confidence": 0.422 + }, + { + "text": "are", + "start": 10.54, + "end": 10.6, + "confidence": 0.79 + }, + { + "text": "you", + "start": 10.6, + "end": 11.38, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 11.38, + "end": 11.66, + "confidence": 0.809 + }, + { + "text": "me,", + "start": 11.66, + "end": 12.44, + "confidence": 0.956 + }, + { + "text": "dude?", + "start": 12.44, + "end": 12.48, + "confidence": 0.837 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 12.48, + "end": 14.46, + "text": " What are you telling me, guys?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.762, + "words": [ + { + "text": "What", + "start": 12.48, + "end": 12.52, + "confidence": 0.504 + }, + { + "text": "are", + "start": 12.52, + "end": 12.56, + "confidence": 0.845 + }, + { + "text": "you", + "start": 12.56, + "end": 12.6, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 12.6, + "end": 12.64, + "confidence": 0.844 + }, + { + "text": "me,", + "start": 12.64, + "end": 13.18, + "confidence": 0.962 + }, + { + "text": "guys?", + "start": 13.18, + "end": 14.46, + "confidence": 0.57 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.5, + "end": 21.3, + "text": " What are you telling me, guys?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.936, + "words": [ + { + "text": "What", + "start": 16.5, + "end": 20.42, + "confidence": 0.887 + }, + { + "text": "are", + "start": 20.42, + "end": 20.54, + "confidence": 0.936 + }, + { + "text": "you", + "start": 20.54, + "end": 20.7, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 20.7, + "end": 20.94, + "confidence": 0.934 + }, + { + "text": "me,", + "start": 20.94, + "end": 21.26, + "confidence": 0.897 + }, + { + "text": "guys?", + "start": 21.26, + "end": 21.3, + "confidence": 0.969 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 21.5, + "end": 22.78, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.953, + "words": [ + { + "text": "What", + "start": 21.5, + "end": 22.2, + "confidence": 0.951 + }, + { + "text": "are", + "start": 22.2, + "end": 22.24, + "confidence": 0.966 + }, + { + "text": "you", + "start": 22.24, + "end": 22.36, + "confidence": 0.997 + }, + { + "text": "telling", + "start": 22.36, + "end": 22.5, + "confidence": 0.862 + }, + { + "text": "me,", + "start": 22.5, + "end": 22.7, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 22.7, + "end": 22.78, + "confidence": 0.967 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 22.78, + "end": 23.02, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.856, + "words": [ + { + "text": "What", + "start": 22.78, + "end": 22.82, + "confidence": 0.54 + }, + { + "text": "are", + "start": 22.82, + "end": 22.86, + "confidence": 0.888 + }, + { + "text": "you", + "start": 22.86, + "end": 22.9, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 22.9, + "end": 22.94, + "confidence": 0.882 + }, + { + "text": "me,", + "start": 22.94, + "end": 22.98, + "confidence": 0.975 + }, + { + "text": "dude?", + "start": 22.98, + "end": 23.02, + "confidence": 0.955 + } + ] + }, + { + "id": 8, + "seek": 2600, + "start": 26.02, + "end": 28.74, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.523, + "words": [ + { + "text": "What", + "start": 26.02, + "end": 26.06, + "confidence": 0.145 + }, + { + "text": "are", + "start": 26.06, + "end": 27.16, + "confidence": 0.442 + }, + { + "text": "you", + "start": 27.16, + "end": 28.24, + "confidence": 0.944 + }, + { + "text": "telling", + "start": 28.24, + "end": 28.28, + "confidence": 0.536 + }, + { + "text": "me,", + "start": 28.28, + "end": 28.7, + "confidence": 0.926 + }, + { + "text": "dude?", + "start": 28.7, + "end": 28.74, + "confidence": 0.683 + } + ] + }, + { + "id": 9, + "seek": 2600, + "start": 30.5, + "end": 31.8, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.586, + "words": [ + { + "text": "What", + "start": 30.5, + "end": 31.1, + "confidence": 0.193 + }, + { + "text": "are", + "start": 31.1, + "end": 31.14, + "confidence": 0.532 + }, + { + "text": "you", + "start": 31.14, + "end": 31.46, + "confidence": 0.966 + }, + { + "text": "telling", + "start": 31.46, + "end": 31.5, + "confidence": 0.588 + }, + { + "text": "me,", + "start": 31.5, + "end": 31.68, + "confidence": 0.895 + }, + { + "text": "dude?", + "start": 31.68, + "end": 31.8, + "confidence": 0.776 + } + ] + }, + { + "id": 10, + "seek": 2600, + "start": 31.8, + "end": 32.58, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.664, + "words": [ + { + "text": "What", + "start": 31.8, + "end": 32.26, + "confidence": 0.321 + }, + { + "text": "are", + "start": 32.26, + "end": 32.3, + "confidence": 0.594 + }, + { + "text": "you", + "start": 32.3, + "end": 32.4, + "confidence": 0.972 + }, + { + "text": "telling", + "start": 32.4, + "end": 32.44, + "confidence": 0.609 + }, + { + "text": "me,", + "start": 32.44, + "end": 32.48, + "confidence": 0.891 + }, + { + "text": "dude?", + "start": 32.48, + "end": 32.58, + "confidence": 0.85 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 33.52, + "end": 35.4, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.711, + "words": [ + { + "text": "What", + "start": 33.52, + "end": 33.56, + "confidence": 0.44 + }, + { + "text": "are", + "start": 33.56, + "end": 33.66, + "confidence": 0.602 + }, + { + "text": "you", + "start": 33.66, + "end": 33.74, + "confidence": 0.972 + }, + { + "text": "telling", + "start": 33.74, + "end": 34.84, + "confidence": 0.635 + }, + { + "text": "me,", + "start": 34.84, + "end": 35.36, + "confidence": 0.901 + }, + { + "text": "dude?", + "start": 35.36, + "end": 35.4, + "confidence": 0.88 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 36.24, + "end": 38.1, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.745, + "words": [ + { + "text": "What", + "start": 36.24, + "end": 36.34, + "confidence": 0.474 + }, + { + "text": "are", + "start": 36.34, + "end": 36.38, + "confidence": 0.621 + }, + { + "text": "you", + "start": 36.38, + "end": 37.34, + "confidence": 0.973 + }, + { + "text": "telling", + "start": 37.34, + "end": 37.42, + "confidence": 0.715 + }, + { + "text": "me,", + "start": 37.42, + "end": 38.06, + "confidence": 0.929 + }, + { + "text": "dude?", + "start": 38.06, + "end": 38.1, + "confidence": 0.9 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 38.1, + "end": 38.62, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.787, + "words": [ + { + "text": "What", + "start": 38.1, + "end": 38.14, + "confidence": 0.555 + }, + { + "text": "are", + "start": 38.14, + "end": 38.18, + "confidence": 0.674 + }, + { + "text": "you", + "start": 38.18, + "end": 38.22, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 38.22, + "end": 38.26, + "confidence": 0.751 + }, + { + "text": "me,", + "start": 38.26, + "end": 38.3, + "confidence": 0.936 + }, + { + "text": "dude?", + "start": 38.3, + "end": 38.62, + "confidence": 0.921 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 39.52, + "end": 41.92, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.813, + "words": [ + { + "text": "What", + "start": 39.52, + "end": 40.78, + "confidence": 0.577 + }, + { + "text": "are", + "start": 40.78, + "end": 41.06, + "confidence": 0.737 + }, + { + "text": "you", + "start": 41.06, + "end": 41.14, + "confidence": 0.982 + }, + { + "text": "telling", + "start": 41.14, + "end": 41.18, + "confidence": 0.785 + }, + { + "text": "me,", + "start": 41.18, + "end": 41.86, + "confidence": 0.941 + }, + { + "text": "dude?", + "start": 41.86, + "end": 41.92, + "confidence": 0.936 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 41.92, + "end": 42.86, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.834, + "words": [ + { + "text": "What", + "start": 41.92, + "end": 41.96, + "confidence": 0.644 + }, + { + "text": "are", + "start": 41.96, + "end": 42.0, + "confidence": 0.752 + }, + { + "text": "you", + "start": 42.0, + "end": 42.08, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 42.08, + "end": 42.3, + "confidence": 0.788 + }, + { + "text": "me,", + "start": 42.3, + "end": 42.82, + "confidence": 0.953 + }, + { + "text": "dude?", + "start": 42.82, + "end": 42.86, + "confidence": 0.938 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 44.5, + "end": 46.46, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.795, + "words": [ + { + "text": "What", + "start": 44.5, + "end": 45.0, + "confidence": 0.502 + }, + { + "text": "are", + "start": 45.0, + "end": 45.64, + "confidence": 0.732 + }, + { + "text": "you", + "start": 45.64, + "end": 45.72, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 45.72, + "end": 45.92, + "confidence": 0.789 + }, + { + "text": "me,", + "start": 45.92, + "end": 46.38, + "confidence": 0.952 + }, + { + "text": "dude?", + "start": 46.38, + "end": 46.46, + "confidence": 0.932 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 46.46, + "end": 46.94, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.789, + "words": [ + { + "text": "What", + "start": 46.46, + "end": 46.5, + "confidence": 0.481 + }, + { + "text": "are", + "start": 46.5, + "end": 46.54, + "confidence": 0.742 + }, + { + "text": "you", + "start": 46.54, + "end": 46.58, + "confidence": 0.987 + }, + { + "text": "telling", + "start": 46.58, + "end": 46.62, + "confidence": 0.779 + }, + { + "text": "me,", + "start": 46.62, + "end": 46.66, + "confidence": 0.948 + }, + { + "text": "dude?", + "start": 46.66, + "end": 46.94, + "confidence": 0.932 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 47.52, + "end": 48.46, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.792, + "words": [ + { + "text": "What", + "start": 47.52, + "end": 47.56, + "confidence": 0.479 + }, + { + "text": "are", + "start": 47.56, + "end": 48.04, + "confidence": 0.746 + }, + { + "text": "you", + "start": 48.04, + "end": 48.1, + "confidence": 0.987 + }, + { + "text": "telling", + "start": 48.1, + "end": 48.28, + "confidence": 0.785 + }, + { + "text": "me,", + "start": 48.28, + "end": 48.42, + "confidence": 0.95 + }, + { + "text": "dude?", + "start": 48.42, + "end": 48.46, + "confidence": 0.936 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 49.52, + "end": 51.38, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.838, + "words": [ + { + "text": "What", + "start": 49.52, + "end": 50.24, + "confidence": 0.651 + }, + { + "text": "are", + "start": 50.24, + "end": 50.48, + "confidence": 0.757 + }, + { + "text": "you", + "start": 50.48, + "end": 50.56, + "confidence": 0.989 + }, + { + "text": "telling", + "start": 50.56, + "end": 50.6, + "confidence": 0.793 + }, + { + "text": "me,", + "start": 50.6, + "end": 51.32, + "confidence": 0.95 + }, + { + "text": "dude?", + "start": 51.32, + "end": 51.38, + "confidence": 0.943 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 51.98, + "end": 54.06, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.78, + "words": [ + { + "text": "What", + "start": 51.98, + "end": 52.14, + "confidence": 0.43 + }, + { + "text": "are", + "start": 52.14, + "end": 52.62, + "confidence": 0.737 + }, + { + "text": "you", + "start": 52.62, + "end": 52.72, + "confidence": 0.988 + }, + { + "text": "telling", + "start": 52.72, + "end": 52.88, + "confidence": 0.81 + }, + { + "text": "me,", + "start": 52.88, + "end": 54.02, + "confidence": 0.943 + }, + { + "text": "dude?", + "start": 54.02, + "end": 54.06, + "confidence": 0.944 + } + ] + }, + { + "id": 21, + "seek": 5400, + "start": 54.06, + "end": 55.0, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.7, + "words": [ + { + "text": "What", + "start": 54.06, + "end": 54.1, + "confidence": 0.506 + }, + { + "text": "are", + "start": 54.1, + "end": 54.2, + "confidence": 0.588 + }, + { + "text": "you", + "start": 54.2, + "end": 54.46, + "confidence": 0.876 + }, + { + "text": "telling", + "start": 54.46, + "end": 54.5, + "confidence": 0.747 + }, + { + "text": "me,", + "start": 54.5, + "end": 54.96, + "confidence": 0.71 + }, + { + "text": "dude?", + "start": 54.96, + "end": 55.0, + "confidence": 0.848 + } + ] + }, + { + "id": 22, + "seek": 5400, + "start": 56.5, + "end": 58.0, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.739, + "words": [ + { + "text": "What", + "start": 56.5, + "end": 56.82, + "confidence": 0.512 + }, + { + "text": "are", + "start": 56.82, + "end": 56.92, + "confidence": 0.568 + }, + { + "text": "you", + "start": 56.92, + "end": 57.12, + "confidence": 0.964 + }, + { + "text": "telling", + "start": 57.12, + "end": 57.16, + "confidence": 0.808 + }, + { + "text": "me,", + "start": 57.16, + "end": 57.96, + "confidence": 0.803 + }, + { + "text": "dude?", + "start": 57.96, + "end": 58.0, + "confidence": 0.897 + } + ] + }, + { + "id": 23, + "seek": 5400, + "start": 58.0, + "end": 58.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.741, + "words": [ + { + "text": "What", + "start": 58.0, + "end": 58.64, + "confidence": 0.48 + }, + { + "text": "are", + "start": 58.64, + "end": 58.68, + "confidence": 0.584 + }, + { + "text": "you", + "start": 58.68, + "end": 58.78, + "confidence": 0.954 + }, + { + "text": "telling", + "start": 58.78, + "end": 58.82, + "confidence": 0.816 + }, + { + "text": "me,", + "start": 58.82, + "end": 58.86, + "confidence": 0.837 + }, + { + "text": "dude?", + "start": 58.86, + "end": 58.9, + "confidence": 0.908 + } + ] + }, + { + "id": 24, + "seek": 5400, + "start": 59.52, + "end": 60.5, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.783, + "words": [ + { + "text": "What", + "start": 59.52, + "end": 60.02, + "confidence": 0.58 + }, + { + "text": "are", + "start": 60.02, + "end": 60.18, + "confidence": 0.641 + }, + { + "text": "you", + "start": 60.18, + "end": 60.22, + "confidence": 0.967 + }, + { + "text": "telling", + "start": 60.22, + "end": 60.26, + "confidence": 0.826 + }, + { + "text": "me,", + "start": 60.26, + "end": 60.38, + "confidence": 0.853 + }, + { + "text": "dude?", + "start": 60.38, + "end": 60.5, + "confidence": 0.911 + } + ] + }, + { + "id": 25, + "seek": 5400, + "start": 61.52, + "end": 62.82, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.803, + "words": [ + { + "text": "What", + "start": 61.52, + "end": 62.34, + "confidence": 0.613 + }, + { + "text": "are", + "start": 62.34, + "end": 62.38, + "confidence": 0.66 + }, + { + "text": "you", + "start": 62.38, + "end": 62.42, + "confidence": 0.967 + }, + { + "text": "telling", + "start": 62.42, + "end": 62.74, + "confidence": 0.835 + }, + { + "text": "me,", + "start": 62.74, + "end": 62.78, + "confidence": 0.892 + }, + { + "text": "dude?", + "start": 62.78, + "end": 62.82, + "confidence": 0.918 + } + ] + }, + { + "id": 26, + "seek": 5400, + "start": 63.52, + "end": 65.59, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.835, + "words": [ + { + "text": "What", + "start": 63.52, + "end": 64.12, + "confidence": 0.731 + }, + { + "text": "are", + "start": 64.12, + "end": 64.26, + "confidence": 0.666 + }, + { + "text": "you", + "start": 64.26, + "end": 64.3, + "confidence": 0.971 + }, + { + "text": "telling", + "start": 64.3, + "end": 64.6, + "confidence": 0.864 + }, + { + "text": "me,", + "start": 64.6, + "end": 65.02, + "confidence": 0.902 + }, + { + "text": "dude?", + "start": 65.02, + "end": 65.59, + "confidence": 0.92 + } + ] + }, + { + "id": 27, + "seek": 5400, + "start": 65.59, + "end": 66.94, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.825, + "words": [ + { + "text": "What", + "start": 65.59, + "end": 65.84, + "confidence": 0.635 + }, + { + "text": "are", + "start": 65.84, + "end": 65.88, + "confidence": 0.691 + }, + { + "text": "you", + "start": 65.88, + "end": 65.92, + "confidence": 0.975 + }, + { + "text": "telling", + "start": 65.92, + "end": 65.96, + "confidence": 0.866 + }, + { + "text": "me,", + "start": 65.96, + "end": 66.86, + "confidence": 0.918 + }, + { + "text": "dude?", + "start": 66.86, + "end": 66.94, + "confidence": 0.927 + } + ] + }, + { + "id": 28, + "seek": 5400, + "start": 67.52, + "end": 69.18, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.821, + "words": [ + { + "text": "What", + "start": 67.52, + "end": 67.76, + "confidence": 0.572 + }, + { + "text": "are", + "start": 67.76, + "end": 67.9, + "confidence": 0.721 + }, + { + "text": "you", + "start": 67.9, + "end": 67.98, + "confidence": 0.979 + }, + { + "text": "telling", + "start": 67.98, + "end": 68.06, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 68.06, + "end": 68.74, + "confidence": 0.923 + }, + { + "text": "dude?", + "start": 68.74, + "end": 69.18, + "confidence": 0.931 + } + ] + }, + { + "id": 29, + "seek": 5400, + "start": 69.86, + "end": 72.14, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.831, + "words": [ + { + "text": "What", + "start": 69.86, + "end": 69.9, + "confidence": 0.61 + }, + { + "text": "are", + "start": 69.9, + "end": 70.14, + "confidence": 0.724 + }, + { + "text": "you", + "start": 70.14, + "end": 70.54, + "confidence": 0.98 + }, + { + "text": "telling", + "start": 70.54, + "end": 71.0, + "confidence": 0.876 + }, + { + "text": "me,", + "start": 71.0, + "end": 71.9, + "confidence": 0.928 + }, + { + "text": "dude?", + "start": 71.9, + "end": 72.14, + "confidence": 0.932 + } + ] + }, + { + "id": 30, + "seek": 5400, + "start": 72.14, + "end": 72.76, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.813, + "words": [ + { + "text": "What", + "start": 72.14, + "end": 72.4, + "confidence": 0.516 + }, + { + "text": "are", + "start": 72.4, + "end": 72.44, + "confidence": 0.736 + }, + { + "text": "you", + "start": 72.44, + "end": 72.52, + "confidence": 0.982 + }, + { + "text": "telling", + "start": 72.52, + "end": 72.6, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 72.6, + "end": 72.72, + "confidence": 0.943 + }, + { + "text": "dude?", + "start": 72.72, + "end": 72.76, + "confidence": 0.937 + } + ] + }, + { + "id": 31, + "seek": 5400, + "start": 74.08, + "end": 76.34, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.85, + "words": [ + { + "text": "What", + "start": 74.08, + "end": 74.32, + "confidence": 0.651 + }, + { + "text": "are", + "start": 74.32, + "end": 74.36, + "confidence": 0.755 + }, + { + "text": "you", + "start": 74.36, + "end": 74.94, + "confidence": 0.984 + }, + { + "text": "telling", + "start": 74.94, + "end": 75.56, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 75.56, + "end": 76.3, + "confidence": 0.943 + }, + { + "text": "dude?", + "start": 76.3, + "end": 76.34, + "confidence": 0.937 + } + ] + }, + { + "id": 32, + "seek": 5400, + "start": 76.34, + "end": 77.98, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.839, + "words": [ + { + "text": "What", + "start": 76.34, + "end": 77.38, + "confidence": 0.607 + }, + { + "text": "are", + "start": 77.38, + "end": 77.42, + "confidence": 0.759 + }, + { + "text": "you", + "start": 77.42, + "end": 77.46, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 77.46, + "end": 77.54, + "confidence": 0.879 + }, + { + "text": "me,", + "start": 77.54, + "end": 77.94, + "confidence": 0.938 + }, + { + "text": "dude?", + "start": 77.94, + "end": 77.98, + "confidence": 0.936 + } + ] + }, + { + "id": 33, + "seek": 5400, + "start": 77.98, + "end": 79.08, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.85, + "words": [ + { + "text": "What", + "start": 77.98, + "end": 78.02, + "confidence": 0.636 + }, + { + "text": "are", + "start": 78.02, + "end": 78.06, + "confidence": 0.75 + }, + { + "text": "you", + "start": 78.06, + "end": 78.92, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 78.92, + "end": 78.96, + "confidence": 0.894 + }, + { + "text": "me,", + "start": 78.96, + "end": 79.04, + "confidence": 0.954 + }, + { + "text": "dude?", + "start": 79.04, + "end": 79.08, + "confidence": 0.939 + } + ] + }, + { + "id": 34, + "seek": 5400, + "start": 80.22, + "end": 81.66, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.813, + "words": [ + { + "text": "What", + "start": 80.22, + "end": 80.26, + "confidence": 0.484 + }, + { + "text": "are", + "start": 80.26, + "end": 80.6, + "confidence": 0.745 + }, + { + "text": "you", + "start": 80.6, + "end": 80.68, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 80.68, + "end": 80.96, + "confidence": 0.899 + }, + { + "text": "me,", + "start": 80.96, + "end": 81.62, + "confidence": 0.96 + }, + { + "text": "dude?", + "start": 81.62, + "end": 81.66, + "confidence": 0.942 + } + ] + }, + { + "id": 35, + "seek": 8200, + "start": 82.02, + "end": 83.08, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.772, + "words": [ + { + "text": "What", + "start": 82.02, + "end": 82.06, + "confidence": 0.701 + }, + { + "text": "are", + "start": 82.06, + "end": 82.28, + "confidence": 0.488 + }, + { + "text": "you", + "start": 82.28, + "end": 82.38, + "confidence": 0.934 + }, + { + "text": "telling", + "start": 82.38, + "end": 82.42, + "confidence": 0.755 + }, + { + "text": "me,", + "start": 82.42, + "end": 82.86, + "confidence": 0.92 + }, + { + "text": "dude?", + "start": 82.86, + "end": 83.08, + "confidence": 0.956 + } + ] + }, + { + "id": 36, + "seek": 8200, + "start": 84.34, + "end": 85.3, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.786, + "words": [ + { + "text": "What", + "start": 84.34, + "end": 84.38, + "confidence": 0.529 + }, + { + "text": "are", + "start": 84.38, + "end": 84.64, + "confidence": 0.637 + }, + { + "text": "you", + "start": 84.64, + "end": 85.12, + "confidence": 0.969 + }, + { + "text": "telling", + "start": 85.12, + "end": 85.16, + "confidence": 0.815 + }, + { + "text": "me,", + "start": 85.16, + "end": 85.26, + "confidence": 0.932 + }, + { + "text": "dude?", + "start": 85.26, + "end": 85.3, + "confidence": 0.952 + } + ] + }, + { + "id": 37, + "seek": 8200, + "start": 85.52, + "end": 87.94, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.836, + "words": [ + { + "text": "What", + "start": 85.52, + "end": 86.82, + "confidence": 0.676 + }, + { + "text": "are", + "start": 86.82, + "end": 86.86, + "confidence": 0.703 + }, + { + "text": "you", + "start": 86.86, + "end": 86.98, + "confidence": 0.97 + }, + { + "text": "telling", + "start": 86.98, + "end": 87.02, + "confidence": 0.826 + }, + { + "text": "me,", + "start": 87.02, + "end": 87.9, + "confidence": 0.942 + }, + { + "text": "dude?", + "start": 87.9, + "end": 87.94, + "confidence": 0.955 + } + ] + }, + { + "id": 38, + "seek": 8200, + "start": 87.94, + "end": 89.56, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.849, + "words": [ + { + "text": "What", + "start": 87.94, + "end": 88.34, + "confidence": 0.7 + }, + { + "text": "are", + "start": 88.34, + "end": 89.16, + "confidence": 0.717 + }, + { + "text": "you", + "start": 89.16, + "end": 89.44, + "confidence": 0.976 + }, + { + "text": "telling", + "start": 89.44, + "end": 89.48, + "confidence": 0.84 + }, + { + "text": "me,", + "start": 89.48, + "end": 89.52, + "confidence": 0.954 + }, + { + "text": "dude?", + "start": 89.52, + "end": 89.56, + "confidence": 0.954 + } + ] + }, + { + "id": 39, + "seek": 8200, + "start": 89.94, + "end": 90.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.859, + "words": [ + { + "text": "What", + "start": 89.94, + "end": 90.1, + "confidence": 0.747 + }, + { + "text": "are", + "start": 90.1, + "end": 90.14, + "confidence": 0.718 + }, + { + "text": "you", + "start": 90.14, + "end": 90.28, + "confidence": 0.976 + }, + { + "text": "telling", + "start": 90.28, + "end": 90.5, + "confidence": 0.847 + }, + { + "text": "me,", + "start": 90.5, + "end": 90.86, + "confidence": 0.944 + }, + { + "text": "dude?", + "start": 90.86, + "end": 90.9, + "confidence": 0.959 + } + ] + }, + { + "id": 40, + "seek": 8200, + "start": 91.52, + "end": 92.2, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.851, + "words": [ + { + "text": "What", + "start": 91.52, + "end": 91.6, + "confidence": 0.723 + }, + { + "text": "are", + "start": 91.6, + "end": 91.64, + "confidence": 0.718 + }, + { + "text": "you", + "start": 91.64, + "end": 91.68, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 91.68, + "end": 91.72, + "confidence": 0.83 + }, + { + "text": "me,", + "start": 91.72, + "end": 92.16, + "confidence": 0.944 + }, + { + "text": "dude?", + "start": 92.16, + "end": 92.2, + "confidence": 0.958 + } + ] + }, + { + "id": 41, + "seek": 8200, + "start": 93.52, + "end": 94.66, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.862, + "words": [ + { + "text": "What", + "start": 93.52, + "end": 94.04, + "confidence": 0.743 + }, + { + "text": "are", + "start": 94.04, + "end": 94.08, + "confidence": 0.73 + }, + { + "text": "you", + "start": 94.08, + "end": 94.24, + "confidence": 0.981 + }, + { + "text": "telling", + "start": 94.24, + "end": 94.28, + "confidence": 0.848 + }, + { + "text": "me,", + "start": 94.28, + "end": 94.36, + "confidence": 0.949 + }, + { + "text": "dude?", + "start": 94.36, + "end": 94.66, + "confidence": 0.962 + } + ] + }, + { + "id": 42, + "seek": 8200, + "start": 95.52, + "end": 98.44, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.867, + "words": [ + { + "text": "What", + "start": 95.52, + "end": 97.08, + "confidence": 0.72 + }, + { + "text": "are", + "start": 97.08, + "end": 97.16, + "confidence": 0.762 + }, + { + "text": "you", + "start": 97.16, + "end": 97.28, + "confidence": 0.984 + }, + { + "text": "telling", + "start": 97.28, + "end": 97.72, + "confidence": 0.862 + }, + { + "text": "me,", + "start": 97.72, + "end": 98.4, + "confidence": 0.951 + }, + { + "text": "dude?", + "start": 98.4, + "end": 98.44, + "confidence": 0.962 + } + ] + }, + { + "id": 43, + "seek": 8200, + "start": 98.44, + "end": 98.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.885, + "words": [ + { + "text": "What", + "start": 98.44, + "end": 98.48, + "confidence": 0.77 + }, + { + "text": "are", + "start": 98.48, + "end": 98.52, + "confidence": 0.781 + }, + { + "text": "you", + "start": 98.52, + "end": 98.78, + "confidence": 0.986 + }, + { + "text": "telling", + "start": 98.78, + "end": 98.82, + "confidence": 0.867 + }, + { + "text": "me,", + "start": 98.82, + "end": 98.86, + "confidence": 0.965 + }, + { + "text": "dude?", + "start": 98.86, + "end": 98.9, + "confidence": 0.965 + } + ] + }, + { + "id": 44, + "seek": 8200, + "start": 99.52, + "end": 101.8, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.887, + "words": [ + { + "text": "What", + "start": 99.52, + "end": 101.06, + "confidence": 0.741 + }, + { + "text": "are", + "start": 101.06, + "end": 101.1, + "confidence": 0.806 + }, + { + "text": "you", + "start": 101.1, + "end": 101.14, + "confidence": 0.989 + }, + { + "text": "telling", + "start": 101.14, + "end": 101.44, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 101.44, + "end": 101.76, + "confidence": 0.972 + }, + { + "text": "dude?", + "start": 101.76, + "end": 101.8, + "confidence": 0.966 + } + ] + }, + { + "id": 45, + "seek": 8200, + "start": 101.8, + "end": 103.02, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.881, + "words": [ + { + "text": "What", + "start": 101.8, + "end": 102.82, + "confidence": 0.731 + }, + { + "text": "are", + "start": 102.82, + "end": 102.86, + "confidence": 0.796 + }, + { + "text": "you", + "start": 102.86, + "end": 102.9, + "confidence": 0.988 + }, + { + "text": "telling", + "start": 102.9, + "end": 102.94, + "confidence": 0.868 + }, + { + "text": "me,", + "start": 102.94, + "end": 102.98, + "confidence": 0.971 + }, + { + "text": "dude?", + "start": 102.98, + "end": 103.02, + "confidence": 0.966 + } + ] + }, + { + "id": 46, + "seek": 8200, + "start": 104.5, + "end": 105.34, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.888, + "words": [ + { + "text": "What", + "start": 104.5, + "end": 104.82, + "confidence": 0.737 + }, + { + "text": "are", + "start": 104.82, + "end": 105.16, + "confidence": 0.806 + }, + { + "text": "you", + "start": 105.16, + "end": 105.2, + "confidence": 0.99 + }, + { + "text": "telling", + "start": 105.2, + "end": 105.24, + "confidence": 0.885 + }, + { + "text": "me,", + "start": 105.24, + "end": 105.3, + "confidence": 0.974 + }, + { + "text": "dude?", + "start": 105.3, + "end": 105.34, + "confidence": 0.967 + } + ] + }, + { + "id": 47, + "seek": 8200, + "start": 106.5, + "end": 108.02, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.874, + "words": [ + { + "text": "What", + "start": 106.5, + "end": 106.92, + "confidence": 0.697 + }, + { + "text": "are", + "start": 106.92, + "end": 107.1, + "confidence": 0.786 + }, + { + "text": "you", + "start": 107.1, + "end": 107.14, + "confidence": 0.99 + }, + { + "text": "telling", + "start": 107.14, + "end": 107.18, + "confidence": 0.876 + }, + { + "text": "me,", + "start": 107.18, + "end": 107.76, + "confidence": 0.971 + }, + { + "text": "dude?", + "start": 107.76, + "end": 108.02, + "confidence": 0.968 + } + ] + }, + { + "id": 48, + "seek": 8200, + "start": 108.02, + "end": 109.54, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.82, + "words": [ + { + "text": "What", + "start": 108.02, + "end": 108.06, + "confidence": 0.48 + }, + { + "text": "are", + "start": 108.06, + "end": 108.2, + "confidence": 0.773 + }, + { + "text": "you", + "start": 108.2, + "end": 108.24, + "confidence": 0.99 + }, + { + "text": "telling", + "start": 108.24, + "end": 108.34, + "confidence": 0.878 + }, + { + "text": "me,", + "start": 108.34, + "end": 109.48, + "confidence": 0.97 + }, + { + "text": "dude?", + "start": 109.48, + "end": 109.54, + "confidence": 0.97 + } + ] + }, + { + "id": 49, + "seek": 11000, + "start": 110.02, + "end": 110.56, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.719, + "words": [ + { + "text": "What", + "start": 110.02, + "end": 110.16, + "confidence": 0.419 + }, + { + "text": "are", + "start": 110.16, + "end": 110.2, + "confidence": 0.523 + }, + { + "text": "you", + "start": 110.2, + "end": 110.24, + "confidence": 0.913 + }, + { + "text": "telling", + "start": 110.24, + "end": 110.48, + "confidence": 0.79 + }, + { + "text": "me,", + "start": 110.48, + "end": 110.52, + "confidence": 0.904 + }, + { + "text": "dude?", + "start": 110.52, + "end": 110.56, + "confidence": 0.964 + } + ] + }, + { + "id": 50, + "seek": 11000, + "start": 112.14, + "end": 113.52, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.835, + "words": [ + { + "text": "What", + "start": 112.14, + "end": 112.3, + "confidence": 0.644 + }, + { + "text": "are", + "start": 112.3, + "end": 113.22, + "confidence": 0.734 + }, + { + "text": "you", + "start": 113.22, + "end": 113.4, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 113.4, + "end": 113.44, + "confidence": 0.821 + }, + { + "text": "me,", + "start": 113.44, + "end": 113.48, + "confidence": 0.933 + }, + { + "text": "dude?", + "start": 113.48, + "end": 113.52, + "confidence": 0.962 + } + ] + }, + { + "id": 51, + "seek": 11000, + "start": 113.9, + "end": 114.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.832, + "words": [ + { + "text": "What", + "start": 113.9, + "end": 114.06, + "confidence": 0.615 + }, + { + "text": "are", + "start": 114.06, + "end": 114.16, + "confidence": 0.733 + }, + { + "text": "you", + "start": 114.16, + "end": 114.36, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 114.36, + "end": 114.4, + "confidence": 0.832 + }, + { + "text": "me,", + "start": 114.4, + "end": 114.86, + "confidence": 0.934 + }, + { + "text": "dude?", + "start": 114.86, + "end": 114.9, + "confidence": 0.967 + } + ] + }, + { + "id": 52, + "seek": 11000, + "start": 115.52, + "end": 117.54, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.86, + "words": [ + { + "text": "What", + "start": 115.52, + "end": 116.54, + "confidence": 0.67 + }, + { + "text": "are", + "start": 116.54, + "end": 116.58, + "confidence": 0.788 + }, + { + "text": "you", + "start": 116.58, + "end": 116.64, + "confidence": 0.984 + }, + { + "text": "telling", + "start": 116.64, + "end": 116.68, + "confidence": 0.852 + }, + { + "text": "me,", + "start": 116.68, + "end": 117.44, + "confidence": 0.947 + }, + { + "text": "dude?", + "start": 117.44, + "end": 117.54, + "confidence": 0.966 + } + ] + }, + { + "id": 53, + "seek": 11000, + "start": 118.18, + "end": 120.14, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.88, + "words": [ + { + "text": "What", + "start": 118.18, + "end": 118.36, + "confidence": 0.758 + }, + { + "text": "are", + "start": 118.36, + "end": 118.4, + "confidence": 0.789 + }, + { + "text": "you", + "start": 118.4, + "end": 118.74, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 118.74, + "end": 118.82, + "confidence": 0.858 + }, + { + "text": "me,", + "start": 118.82, + "end": 120.1, + "confidence": 0.948 + }, + { + "text": "dude?", + "start": 120.1, + "end": 120.14, + "confidence": 0.968 + } + ] + }, + { + "id": 54, + "seek": 11000, + "start": 120.14, + "end": 120.38, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.884, + "words": [ + { + "text": "What", + "start": 120.14, + "end": 120.18, + "confidence": 0.757 + }, + { + "text": "are", + "start": 120.18, + "end": 120.22, + "confidence": 0.808 + }, + { + "text": "you", + "start": 120.22, + "end": 120.26, + "confidence": 0.986 + }, + { + "text": "telling", + "start": 120.26, + "end": 120.3, + "confidence": 0.857 + }, + { + "text": "me,", + "start": 120.3, + "end": 120.34, + "confidence": 0.954 + }, + { + "text": "dude?", + "start": 120.34, + "end": 120.38, + "confidence": 0.966 + } + ] + }, + { + "id": 55, + "seek": 11000, + "start": 121.52, + "end": 123.36, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.874, + "words": [ + { + "text": "What", + "start": 121.52, + "end": 122.72, + "confidence": 0.718 + }, + { + "text": "are", + "start": 122.72, + "end": 123.2, + "confidence": 0.788 + }, + { + "text": "you", + "start": 123.2, + "end": 123.24, + "confidence": 0.986 + }, + { + "text": "telling", + "start": 123.24, + "end": 123.28, + "confidence": 0.865 + }, + { + "text": "me,", + "start": 123.28, + "end": 123.32, + "confidence": 0.952 + }, + { + "text": "dude?", + "start": 123.32, + "end": 123.36, + "confidence": 0.968 + } + ] + }, + { + "id": 56, + "seek": 11000, + "start": 123.52, + "end": 125.14, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.882, + "words": [ + { + "text": "What", + "start": 123.52, + "end": 124.56, + "confidence": 0.718 + }, + { + "text": "are", + "start": 124.56, + "end": 124.74, + "confidence": 0.821 + }, + { + "text": "you", + "start": 124.74, + "end": 125.02, + "confidence": 0.989 + }, + { + "text": "telling", + "start": 125.02, + "end": 125.06, + "confidence": 0.878 + }, + { + "text": "me,", + "start": 125.06, + "end": 125.1, + "confidence": 0.952 + }, + { + "text": "dude?", + "start": 125.1, + "end": 125.14, + "confidence": 0.968 + } + ] + }, + { + "id": 57, + "seek": 11000, + "start": 125.52, + "end": 127.58, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.894, + "words": [ + { + "text": "What", + "start": 125.52, + "end": 126.58, + "confidence": 0.747 + }, + { + "text": "are", + "start": 126.58, + "end": 126.9, + "confidence": 0.837 + }, + { + "text": "you", + "start": 126.9, + "end": 127.06, + "confidence": 0.991 + }, + { + "text": "telling", + "start": 127.06, + "end": 127.14, + "confidence": 0.883 + }, + { + "text": "me,", + "start": 127.14, + "end": 127.54, + "confidence": 0.962 + }, + { + "text": "dude?", + "start": 127.54, + "end": 127.58, + "confidence": 0.969 + } + ] + }, + { + "id": 58, + "seek": 11000, + "start": 127.58, + "end": 129.28, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.895, + "words": [ + { + "text": "What", + "start": 127.58, + "end": 128.98, + "confidence": 0.719 + }, + { + "text": "are", + "start": 128.98, + "end": 129.02, + "confidence": 0.859 + }, + { + "text": "you", + "start": 129.02, + "end": 129.06, + "confidence": 0.992 + }, + { + "text": "telling", + "start": 129.06, + "end": 129.1, + "confidence": 0.896 + }, + { + "text": "me,", + "start": 129.1, + "end": 129.14, + "confidence": 0.965 + }, + { + "text": "dude?", + "start": 129.14, + "end": 129.28, + "confidence": 0.971 + } + ] + }, + { + "id": 59, + "seek": 11000, + "start": 129.72, + "end": 131.68, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.89, + "words": [ + { + "text": "What", + "start": 129.72, + "end": 131.02, + "confidence": 0.703 + }, + { + "text": "are", + "start": 131.02, + "end": 131.46, + "confidence": 0.855 + }, + { + "text": "you", + "start": 131.46, + "end": 131.5, + "confidence": 0.992 + }, + { + "text": "telling", + "start": 131.5, + "end": 131.54, + "confidence": 0.89 + }, + { + "text": "me,", + "start": 131.54, + "end": 131.64, + "confidence": 0.964 + }, + { + "text": "dude?", + "start": 131.64, + "end": 131.68, + "confidence": 0.97 + } + ] + }, + { + "id": 60, + "seek": 11000, + "start": 131.68, + "end": 133.0, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.903, + "words": [ + { + "text": "What", + "start": 131.68, + "end": 132.36, + "confidence": 0.746 + }, + { + "text": "are", + "start": 132.36, + "end": 132.4, + "confidence": 0.86 + }, + { + "text": "you", + "start": 132.4, + "end": 132.44, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 132.44, + "end": 132.48, + "confidence": 0.903 + }, + { + "text": "me,", + "start": 132.48, + "end": 132.52, + "confidence": 0.969 + }, + { + "text": "dude?", + "start": 132.52, + "end": 133.0, + "confidence": 0.972 + } + ] + }, + { + "id": 61, + "seek": 11000, + "start": 133.72, + "end": 135.54, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.891, + "words": [ + { + "text": "What", + "start": 133.72, + "end": 134.08, + "confidence": 0.692 + }, + { + "text": "are", + "start": 134.08, + "end": 134.14, + "confidence": 0.859 + }, + { + "text": "you", + "start": 134.14, + "end": 134.38, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 134.38, + "end": 134.42, + "confidence": 0.9 + }, + { + "text": "me,", + "start": 134.42, + "end": 135.5, + "confidence": 0.971 + }, + { + "text": "dude?", + "start": 135.5, + "end": 135.54, + "confidence": 0.974 + } + ] + }, + { + "id": 62, + "seek": 11000, + "start": 135.54, + "end": 136.88, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.855, + "words": [ + { + "text": "What", + "start": 135.54, + "end": 135.58, + "confidence": 0.534 + }, + { + "text": "are", + "start": 135.58, + "end": 135.62, + "confidence": 0.856 + }, + { + "text": "you", + "start": 135.62, + "end": 135.66, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 135.66, + "end": 135.7, + "confidence": 0.906 + }, + { + "text": "me,", + "start": 135.7, + "end": 135.74, + "confidence": 0.972 + }, + { + "text": "dude?", + "start": 135.74, + "end": 136.88, + "confidence": 0.975 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto.cpu/smartphone.mp3.words.json b/tests/expected/tiny_auto.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f90643caf0519b49ba0cbc0381cd8d8ced1f218b --- /dev/null +++ b/tests/expected/tiny_auto.cpu/smartphone.mp3.words.json @@ -0,0 +1,5038 @@ +{ + "text": " C'est évidence que dit Nicolas. Mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup très souvent ementionné. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes. C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très fondablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent. On s'est espécie de nous voter dans la relation à l'objet. C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de relation de médiation avec le monde qui rendent un peu avec la même sédiforme de le jeu. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses et de rojets. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure, en avoir besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira jamais? Et puis, il faut en remettre la faute sur les gens qui ont créé cette route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine, soit définissant des beurs, on définisse un signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 1.38, + "text": " C'est évidence que dit Nicolas.", + "tokens": [ + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.66, + "confidence": 0.849 + }, + { + "text": "évidence", + "start": 0.66, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.06, + "confidence": 0.883 + }, + { + "text": "dit", + "start": 1.06, + "end": 1.16, + "confidence": 0.344 + }, + { + "text": "Nicolas.", + "start": 1.16, + "end": 1.38, + "confidence": 0.921 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.66, + "end": 3.62, + "text": " Mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 6313, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 1254, + 425, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.745, + "words": [ + { + "text": "Mais", + "start": 1.66, + "end": 1.9, + "confidence": 0.956 + }, + { + "text": "je", + "start": 1.9, + "end": 2.18, + "confidence": 0.629 + }, + { + "text": "me", + "start": 2.18, + "end": 2.3, + "confidence": 0.943 + }, + { + "text": "l'étais", + "start": 2.3, + "end": 2.54, + "confidence": 0.708 + }, + { + "text": "jamais", + "start": 2.54, + "end": 2.78, + "confidence": 0.962 + }, + { + "text": "formulé", + "start": 2.78, + "end": 3.2, + "confidence": 0.541 + }, + { + "text": "comme", + "start": 3.2, + "end": 3.34, + "confidence": 0.975 + }, + { + "text": "ça.", + "start": 3.34, + "end": 3.62, + "confidence": 0.979 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 4.14, + "end": 8.82, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.762, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.22, + "confidence": 0.39 + }, + { + "text": "qui", + "start": 4.22, + "end": 4.34, + "confidence": 0.934 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.46, + "confidence": 0.735 + }, + { + "text": "la", + "start": 4.46, + "end": 4.56, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.56, + "end": 4.96, + "confidence": 0.933 + }, + { + "text": "du", + "start": 4.96, + "end": 5.16, + "confidence": 0.936 + }, + { + "text": "smartphone,", + "start": 5.16, + "end": 5.74, + "confidence": 0.909 + }, + { + "text": "c'est", + "start": 5.74, + "end": 6.12, + "confidence": 0.871 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.2, + "confidence": 0.982 + }, + { + "text": "seulement", + "start": 6.2, + "end": 6.52, + "confidence": 0.991 + }, + { + "text": "la", + "start": 6.52, + "end": 6.76, + "confidence": 0.627 + }, + { + "text": "cumulation", + "start": 6.76, + "end": 7.18, + "confidence": 0.679 + }, + { + "text": "des", + "start": 7.18, + "end": 7.54, + "confidence": 0.752 + }, + { + "text": "fonctions,", + "start": 7.54, + "end": 8.1, + "confidence": 0.826 + }, + { + "text": "mais", + "start": 8.1, + "end": 8.42, + "confidence": 0.511 + }, + { + "text": "la", + "start": 8.42, + "end": 8.58, + "confidence": 0.717 + }, + { + "text": "manière", + "start": 8.58, + "end": 8.82, + "confidence": 0.457 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 8.9, + "end": 10.84, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.3, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.04, + "confidence": 0.329 + }, + { + "text": "quelques", + "start": 9.04, + "end": 9.26, + "confidence": 0.282 + }, + { + "text": "interagues", + "start": 9.26, + "end": 10.18, + "confidence": 0.238 + }, + { + "text": "entraîne.", + "start": 10.18, + "end": 10.84, + "confidence": 0.421 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 11.04, + "end": 12.92, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "Et", + "start": 11.04, + "end": 11.1, + "confidence": 0.394 + }, + { + "text": "il", + "start": 11.1, + "end": 11.24, + "confidence": 0.128 + }, + { + "text": "est", + "start": 11.24, + "end": 11.38, + "confidence": 0.237 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.62, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.62, + "end": 11.78, + "confidence": 0.879 + }, + { + "text": "la", + "start": 11.78, + "end": 11.82, + "confidence": 0.968 + }, + { + "text": "photo", + "start": 11.82, + "end": 11.92, + "confidence": 0.811 + }, + { + "text": "c'est", + "start": 11.92, + "end": 12.18, + "confidence": 0.792 + }, + { + "text": "hyper", + "start": 12.18, + "end": 12.38, + "confidence": 0.939 + }, + { + "text": "convaincant.", + "start": 12.38, + "end": 12.92, + "confidence": 0.494 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.26, + "end": 18.03, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 38262, + 421, + 6, + 388, + 257, + 8862, + 8796 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.553, + "words": [ + { + "text": "Alors", + "start": 13.26, + "end": 13.48, + "confidence": 0.904 + }, + { + "text": "évidemment,", + "start": 13.48, + "end": 13.92, + "confidence": 0.785 + }, + { + "text": "il", + "start": 13.92, + "end": 14.4, + "confidence": 0.961 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.85 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.874 + }, + { + "text": "les", + "start": 15.38, + "end": 15.62, + "confidence": 0.933 + }, + { + "text": "interfaces", + "start": 15.62, + "end": 15.86, + "confidence": 0.359 + }, + { + "text": "les", + "start": 15.86, + "end": 16.5, + "confidence": 0.414 + }, + { + "text": "grand-attêtes", + "start": 16.5, + "end": 16.94, + "confidence": 0.178 + }, + { + "text": "qu'il", + "start": 16.94, + "end": 17.18, + "confidence": 0.632 + }, + { + "text": "a", + "start": 17.18, + "end": 17.24, + "confidence": 0.969 + }, + { + "text": "été", + "start": 17.24, + "end": 17.5, + "confidence": 0.957 + }, + { + "text": "beaucoup", + "start": 17.5, + "end": 18.03, + "confidence": 0.572 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 18.03, + "end": 19.26, + "text": " très souvent ementionné.", + "tokens": [ + 5732, + 20847, + 846, + 1251, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.541, + "words": [ + { + "text": "très", + "start": 18.03, + "end": 18.66, + "confidence": 0.959 + }, + { + "text": "souvent", + "start": 18.66, + "end": 18.82, + "confidence": 0.994 + }, + { + "text": "ementionné.", + "start": 18.82, + "end": 19.26, + "confidence": 0.365 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.86, + "end": 23.54, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.648, + "words": [ + { + "text": "Mais", + "start": 19.86, + "end": 20.22, + "confidence": 0.975 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.56, + "confidence": 0.479 + }, + { + "text": "il", + "start": 20.56, + "end": 20.6, + "confidence": 0.978 + }, + { + "text": "faudrait", + "start": 20.6, + "end": 20.74, + "confidence": 0.753 + }, + { + "text": "que", + "start": 20.74, + "end": 20.92, + "confidence": 0.378 + }, + { + "text": "les", + "start": 20.92, + "end": 20.98, + "confidence": 0.182 + }, + { + "text": "profites", + "start": 20.98, + "end": 21.26, + "confidence": 0.622 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.7, + "confidence": 0.502 + }, + { + "text": "de", + "start": 21.7, + "end": 21.84, + "confidence": 0.468 + }, + { + "text": "20", + "start": 21.84, + "end": 22.08, + "confidence": 0.924 + }, + { + "text": "ans", + "start": 22.08, + "end": 22.28, + "confidence": 0.937 + }, + { + "text": "pendant", + "start": 22.28, + "end": 22.46, + "confidence": 0.903 + }, + { + "text": "l'été,", + "start": 22.46, + "end": 22.96, + "confidence": 0.499 + }, + { + "text": "les", + "start": 22.96, + "end": 23.04, + "confidence": 0.861 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.934 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 23.58, + "end": 25.26, + "text": " nous ont appris à piquer sur des icônes.", + "tokens": [ + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.663, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.833 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.86, + "confidence": 0.976 + }, + { + "text": "appris", + "start": 23.86, + "end": 24.06, + "confidence": 0.947 + }, + { + "text": "à", + "start": 24.06, + "end": 24.24, + "confidence": 0.24 + }, + { + "text": "piquer", + "start": 24.24, + "end": 24.42, + "confidence": 0.45 + }, + { + "text": "sur", + "start": 24.42, + "end": 24.68, + "confidence": 0.749 + }, + { + "text": "des", + "start": 24.68, + "end": 24.8, + "confidence": 0.96 + }, + { + "text": "icônes.", + "start": 24.8, + "end": 25.26, + "confidence": 0.656 + } + ] + }, + { + "id": 9, + "seek": 2556, + "start": 25.58, + "end": 30.56, + "text": " C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible.", + "tokens": [ + 383, + 6, + 377, + 1769, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.643, + "words": [ + { + "text": "C'est", + "start": 25.58, + "end": 25.66, + "confidence": 0.449 + }, + { + "text": "ce", + "start": 25.66, + "end": 25.72, + "confidence": 0.491 + }, + { + "text": "que", + "start": 25.72, + "end": 25.94, + "confidence": 0.935 + }, + { + "text": "le", + "start": 25.94, + "end": 26.6, + "confidence": 0.377 + }, + { + "text": "smartphone", + "start": 26.6, + "end": 26.86, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 26.86, + "end": 27.42, + "confidence": 0.813 + }, + { + "text": "le", + "start": 27.42, + "end": 27.64, + "confidence": 0.957 + }, + { + "text": "toucher,", + "start": 27.64, + "end": 28.06, + "confidence": 0.73 + }, + { + "text": "qui", + "start": 28.06, + "end": 28.18, + "confidence": 0.209 + }, + { + "text": "rend", + "start": 28.18, + "end": 28.34, + "confidence": 0.877 + }, + { + "text": "le", + "start": 28.34, + "end": 28.68, + "confidence": 0.991 + }, + { + "text": "contact", + "start": 28.68, + "end": 28.96, + "confidence": 0.854 + }, + { + "text": "plus", + "start": 28.96, + "end": 29.48, + "confidence": 0.865 + }, + { + "text": "direct,", + "start": 29.48, + "end": 29.96, + "confidence": 0.692 + }, + { + "text": "plus", + "start": 29.96, + "end": 30.24, + "confidence": 0.928 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.56, + "confidence": 0.332 + } + ] + }, + { + "id": 10, + "seek": 2556, + "start": 31.04, + "end": 34.34, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner", + "tokens": [ + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.827, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.914 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.34, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.64, + "confidence": 0.378 + }, + { + "text": "il", + "start": 31.64, + "end": 31.78, + "confidence": 0.935 + }, + { + "text": "faudrait", + "start": 31.78, + "end": 31.88, + "confidence": 0.99 + }, + { + "text": "parler", + "start": 31.88, + "end": 32.12, + "confidence": 0.882 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.3, + "confidence": 0.889 + }, + { + "text": "des", + "start": 32.3, + "end": 32.44, + "confidence": 0.927 + }, + { + "text": "applications", + "start": 32.44, + "end": 32.78, + "confidence": 0.839 + }, + { + "text": "qui", + "start": 32.78, + "end": 33.16, + "confidence": 0.652 + }, + { + "text": "permettent", + "start": 33.16, + "end": 33.68, + "confidence": 0.951 + }, + { + "text": "de", + "start": 33.68, + "end": 33.9, + "confidence": 0.952 + }, + { + "text": "contourner", + "start": 33.9, + "end": 34.34, + "confidence": 0.787 + } + ] + }, + { + "id": 11, + "seek": 2556, + "start": 34.34, + "end": 37.72, + "text": " le côté tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 476, + 18437, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.766, + "words": [ + { + "text": "le", + "start": 34.34, + "end": 34.52, + "confidence": 0.989 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.72, + "confidence": 0.983 + }, + { + "text": "tout", + "start": 34.72, + "end": 34.96, + "confidence": 0.954 + }, + { + "text": "flu", + "start": 34.96, + "end": 35.2, + "confidence": 0.548 + }, + { + "text": "de", + "start": 35.2, + "end": 35.42, + "confidence": 0.248 + }, + { + "text": "la", + "start": 35.42, + "end": 35.72, + "confidence": 0.902 + }, + { + "text": "navigation", + "start": 35.72, + "end": 36.04, + "confidence": 0.913 + }, + { + "text": "web", + "start": 36.04, + "end": 36.64, + "confidence": 0.93 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.76, + "confidence": 0.746 + }, + { + "text": "aller", + "start": 36.76, + "end": 36.94, + "confidence": 0.992 + }, + { + "text": "directement", + "start": 36.94, + "end": 37.46, + "confidence": 0.986 + }, + { + "text": "en", + "start": 37.46, + "end": 37.68, + "confidence": 0.644 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.72, + "confidence": 0.689 + } + ] + }, + { + "id": 12, + "seek": 2556, + "start": 37.72, + "end": 43.06, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.725, + "words": [ + { + "text": "Bref,", + "start": 37.72, + "end": 38.9, + "confidence": 0.967 + }, + { + "text": "tout", + "start": 38.9, + "end": 38.98, + "confidence": 0.786 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.36, + "confidence": 0.985 + }, + { + "text": "ce", + "start": 39.36, + "end": 39.68, + "confidence": 0.929 + }, + { + "text": "sont", + "start": 39.68, + "end": 39.84, + "confidence": 0.963 + }, + { + "text": "les", + "start": 39.84, + "end": 40.06, + "confidence": 0.976 + }, + { + "text": "conditions", + "start": 40.06, + "end": 40.54, + "confidence": 0.96 + }, + { + "text": "qui", + "start": 40.54, + "end": 40.9, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.9, + "end": 41.54, + "confidence": 0.986 + }, + { + "text": "de", + "start": 41.54, + "end": 41.58, + "confidence": 0.989 + }, + { + "text": "créer", + "start": 41.58, + "end": 41.92, + "confidence": 0.918 + }, + { + "text": "cette", + "start": 41.92, + "end": 42.3, + "confidence": 0.518 + }, + { + "text": "objet,", + "start": 42.3, + "end": 42.7, + "confidence": 0.213 + }, + { + "text": "en", + "start": 42.7, + "end": 42.8, + "confidence": 0.191 + }, + { + "text": "Nicolas,", + "start": 42.8, + "end": 43.06, + "confidence": 0.508 + } + ] + }, + { + "id": 13, + "seek": 2556, + "start": 43.1, + "end": 46.48, + "text": " dit qu'il est très fondablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 6176, + 421, + 6, + 388, + 871, + 5732, + 9557, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.768, + "words": [ + { + "text": "dit", + "start": 43.1, + "end": 43.52, + "confidence": 0.551 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.68, + "confidence": 0.969 + }, + { + "text": "est", + "start": 43.68, + "end": 43.8, + "confidence": 0.851 + }, + { + "text": "très", + "start": 43.8, + "end": 43.94, + "confidence": 0.368 + }, + { + "text": "fondablement", + "start": 43.94, + "end": 44.68, + "confidence": 0.593 + }, + { + "text": "inédit", + "start": 44.68, + "end": 45.52, + "confidence": 0.66 + }, + { + "text": "dans", + "start": 45.52, + "end": 45.74, + "confidence": 0.778 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 45.98, + "confidence": 0.825 + }, + { + "text": "de", + "start": 45.98, + "end": 46.08, + "confidence": 0.978 + }, + { + "text": "l'humanité.", + "start": 46.08, + "end": 46.48, + "confidence": 0.991 + } + ] + }, + { + "id": 14, + "seek": 2556, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.16, + "confidence": 0.939 + }, + { + "text": "ça", + "start": 47.16, + "end": 47.46, + "confidence": 0.854 + }, + { + "text": "s'assoulait", + "start": 47.46, + "end": 47.78, + "confidence": 0.591 + }, + { + "text": "d'une", + "start": 47.78, + "end": 48.02, + "confidence": 0.619 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.14, + "confidence": 0.966 + }, + { + "text": "interrogation.", + "start": 48.14, + "end": 48.76, + "confidence": 0.833 + } + ] + }, + { + "id": 15, + "seek": 2556, + "start": 49.26, + "end": 54.34, + "text": " Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 530, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Est-ce", + "start": 49.26, + "end": 49.7, + "confidence": 0.804 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.989 + }, + { + "text": "le", + "start": 49.74, + "end": 49.78, + "confidence": 0.882 + }, + { + "text": "fait", + "start": 49.78, + "end": 49.96, + "confidence": 0.962 + }, + { + "text": "que", + "start": 49.96, + "end": 50.08, + "confidence": 0.935 + }, + { + "text": "cette", + "start": 50.08, + "end": 50.24, + "confidence": 0.95 + }, + { + "text": "objet", + "start": 50.24, + "end": 50.5, + "confidence": 0.968 + }, + { + "text": "soit", + "start": 50.5, + "end": 50.96, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 50.96, + "end": 51.78, + "confidence": 0.91 + }, + { + "text": "un", + "start": 51.78, + "end": 52.06, + "confidence": 0.442 + }, + { + "text": "d'huies", + "start": 52.06, + "end": 52.32, + "confidence": 0.298 + }, + { + "text": "que", + "start": 52.32, + "end": 52.4, + "confidence": 0.966 + }, + { + "text": "notre", + "start": 52.4, + "end": 52.58, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.58, + "end": 53.1, + "confidence": 0.714 + }, + { + "text": "a", + "start": 53.1, + "end": 53.44, + "confidence": 0.522 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.62, + "confidence": 0.659 + }, + { + "text": "est", + "start": 53.62, + "end": 54.0, + "confidence": 0.643 + }, + { + "text": "aussi", + "start": 54.0, + "end": 54.34, + "confidence": 0.808 + } + ] + }, + { + "id": 16, + "seek": 2556, + "start": 54.34, + "end": 55.32, + "text": " un rapport inédit?", + "tokens": [ + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.849, + "words": [ + { + "text": "un", + "start": 54.34, + "end": 54.72, + "confidence": 0.506 + }, + { + "text": "rapport", + "start": 54.72, + "end": 54.94, + "confidence": 0.999 + }, + { + "text": "inédit?", + "start": 54.94, + "end": 55.32, + "confidence": 0.956 + } + ] + }, + { + "id": 17, + "seek": 5548, + "start": 55.6, + "end": 58.6, + "text": " Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 6311, + 712, + 1531, + 22829 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.626, + "words": [ + { + "text": "Je", + "start": 55.6, + "end": 55.84, + "confidence": 0.335 + }, + { + "text": "veux", + "start": 55.84, + "end": 55.94, + "confidence": 0.432 + }, + { + "text": "dire,", + "start": 55.94, + "end": 56.16, + "confidence": 0.992 + }, + { + "text": "est-ce", + "start": 56.16, + "end": 56.34, + "confidence": 0.951 + }, + { + "text": "que", + "start": 56.34, + "end": 56.4, + "confidence": 0.973 + }, + { + "text": "le", + "start": 56.4, + "end": 56.54, + "confidence": 0.987 + }, + { + "text": "rapport", + "start": 56.54, + "end": 56.76, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.76, + "end": 57.1, + "confidence": 0.906 + }, + { + "text": "a", + "start": 57.1, + "end": 57.14, + "confidence": 0.974 + }, + { + "text": "au", + "start": 57.14, + "end": 57.26, + "confidence": 0.319 + }, + { + "text": "sein", + "start": 57.26, + "end": 57.36, + "confidence": 0.244 + }, + { + "text": "de", + "start": 57.36, + "end": 57.44, + "confidence": 0.177 + }, + { + "text": "foi", + "start": 57.44, + "end": 57.54, + "confidence": 0.163 + }, + { + "text": "n'est", + "start": 57.54, + "end": 57.78, + "confidence": 0.777 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.32, + "confidence": 0.606 + }, + { + "text": "à", + "start": 58.32, + "end": 58.46, + "confidence": 0.482 + }, + { + "text": "celui", + "start": 58.46, + "end": 58.6, + "confidence": 0.831 + } + ] + }, + { + "id": 18, + "seek": 5548, + "start": 58.72, + "end": 62.81, + "text": " qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone?", + "tokens": [ + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.781, + "words": [ + { + "text": "qu'on", + "start": 58.72, + "end": 58.88, + "confidence": 0.941 + }, + { + "text": "entretenait", + "start": 58.88, + "end": 59.38, + "confidence": 0.657 + }, + { + "text": "à", + "start": 59.38, + "end": 59.44, + "confidence": 0.973 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.64, + "confidence": 0.909 + }, + { + "text": "objectes", + "start": 59.64, + "end": 60.02, + "confidence": 0.528 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.38, + "confidence": 0.547 + }, + { + "text": "comme", + "start": 60.38, + "end": 60.86, + "confidence": 0.662 + }, + { + "text": "la", + "start": 60.86, + "end": 61.46, + "confidence": 0.898 + }, + { + "text": "voiture", + "start": 61.46, + "end": 61.8, + "confidence": 0.954 + }, + { + "text": "ou", + "start": 61.8, + "end": 62.32, + "confidence": 0.74 + }, + { + "text": "le", + "start": 62.32, + "end": 62.6, + "confidence": 0.847 + }, + { + "text": "téléphone?", + "start": 62.6, + "end": 62.81, + "confidence": 0.979 + } + ] + }, + { + "id": 19, + "seek": 5548, + "start": 62.81, + "end": 66.07, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.882, + "words": [ + { + "text": "Il", + "start": 62.81, + "end": 65.42, + "confidence": 0.866 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.52, + "confidence": 0.865 + }, + { + "text": "a", + "start": 65.52, + "end": 65.56, + "confidence": 0.965 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.62, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.62, + "end": 66.07, + "confidence": 0.858 + } + ] + }, + { + "id": 20, + "seek": 5548, + "start": 66.07, + "end": 69.74, + "text": " On s'est espécie de nous voter dans la relation à l'objet.", + "tokens": [ + 1282, + 262, + 6, + 377, + 7089, + 526, + 4260, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.552, + "words": [ + { + "text": "On", + "start": 66.07, + "end": 67.0, + "confidence": 0.29 + }, + { + "text": "s'est", + "start": 67.0, + "end": 67.26, + "confidence": 0.562 + }, + { + "text": "espécie", + "start": 67.26, + "end": 67.46, + "confidence": 0.233 + }, + { + "text": "de", + "start": 67.46, + "end": 67.58, + "confidence": 0.963 + }, + { + "text": "nous", + "start": 67.58, + "end": 67.8, + "confidence": 0.631 + }, + { + "text": "voter", + "start": 67.8, + "end": 68.24, + "confidence": 0.241 + }, + { + "text": "dans", + "start": 68.24, + "end": 68.68, + "confidence": 0.853 + }, + { + "text": "la", + "start": 68.68, + "end": 68.88, + "confidence": 0.61 + }, + { + "text": "relation", + "start": 68.88, + "end": 69.22, + "confidence": 0.932 + }, + { + "text": "à", + "start": 69.22, + "end": 69.34, + "confidence": 0.792 + }, + { + "text": "l'objet.", + "start": 69.34, + "end": 69.74, + "confidence": 0.889 + } + ] + }, + { + "id": 21, + "seek": 5548, + "start": 70.18, + "end": 74.78, + "text": " C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs", + "tokens": [ + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 10521, + 537, + 394, + 6992, + 421, + 6, + 266, + 257, + 9995, + 368, + 11, + 5173, + 476, + 1266, + 11, + 1512, + 33643, + 25929 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.491, + "words": [ + { + "text": "C'est", + "start": 70.18, + "end": 70.34, + "confidence": 0.907 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.72, + "confidence": 0.527 + }, + { + "text": "éterréciant", + "start": 70.72, + "end": 71.56, + "confidence": 0.277 + }, + { + "text": "parce", + "start": 71.56, + "end": 71.82, + "confidence": 0.298 + }, + { + "text": "qu'on", + "start": 71.82, + "end": 72.3, + "confidence": 0.775 + }, + { + "text": "a", + "start": 72.3, + "end": 72.4, + "confidence": 0.563 + }, + { + "text": "impression", + "start": 72.4, + "end": 72.74, + "confidence": 0.148 + }, + { + "text": "de,", + "start": 72.74, + "end": 73.36, + "confidence": 0.211 + }, + { + "text": "comme", + "start": 73.36, + "end": 73.8, + "confidence": 0.843 + }, + { + "text": "le", + "start": 73.8, + "end": 73.96, + "confidence": 0.9 + }, + { + "text": "10,", + "start": 73.96, + "end": 74.28, + "confidence": 0.395 + }, + { + "text": "les", + "start": 74.28, + "end": 74.32, + "confidence": 0.581 + }, + { + "text": "utilisateurs", + "start": 74.32, + "end": 74.78, + "confidence": 0.745 + } + ] + }, + { + "id": 22, + "seek": 5548, + "start": 74.8, + "end": 77.93, + "text": " et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de", + "tokens": [ + 1030, + 1512, + 6484, + 11, + 23576, + 257, + 1078, + 45768, + 394, + 368, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 11, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.464, + "words": [ + { + "text": "et", + "start": 74.8, + "end": 74.92, + "confidence": 0.147 + }, + { + "text": "les", + "start": 74.92, + "end": 74.96, + "confidence": 0.242 + }, + { + "text": "efforts,", + "start": 74.96, + "end": 75.22, + "confidence": 0.108 + }, + { + "text": "elles", + "start": 75.22, + "end": 75.32, + "confidence": 0.108 + }, + { + "text": "aident", + "start": 75.32, + "end": 75.44, + "confidence": 0.289 + }, + { + "text": "dépendant", + "start": 75.44, + "end": 76.0, + "confidence": 0.576 + }, + { + "text": "de", + "start": 76.0, + "end": 76.16, + "confidence": 0.301 + }, + { + "text": "cette", + "start": 76.16, + "end": 76.2, + "confidence": 0.519 + }, + { + "text": "objet", + "start": 76.2, + "end": 76.48, + "confidence": 0.961 + }, + { + "text": "d'un", + "start": 76.48, + "end": 76.86, + "confidence": 0.883 + }, + { + "text": "lieu,", + "start": 76.86, + "end": 77.06, + "confidence": 0.333 + }, + { + "text": "en", + "start": 77.06, + "end": 77.18, + "confidence": 0.837 + }, + { + "text": "fait,", + "start": 77.18, + "end": 77.42, + "confidence": 0.958 + }, + { + "text": "une", + "start": 77.42, + "end": 77.46, + "confidence": 0.57 + }, + { + "text": "espèce", + "start": 77.46, + "end": 77.74, + "confidence": 0.961 + }, + { + "text": "de", + "start": 77.74, + "end": 77.93, + "confidence": 0.601 + } + ] + }, + { + "id": 23, + "seek": 5548, + "start": 77.93, + "end": 82.98, + "text": " relation de médiation avec le monde qui rendent un peu avec la même sédiforme de", + "tokens": [ + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 517, + 5604, + 4163, + 635, + 5698, + 262, + 7811, + 8629, + 68, + 368 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.556, + "words": [ + { + "text": "relation", + "start": 77.93, + "end": 78.32, + "confidence": 0.839 + }, + { + "text": "de", + "start": 78.32, + "end": 78.7, + "confidence": 0.723 + }, + { + "text": "médiation", + "start": 78.7, + "end": 79.4, + "confidence": 0.872 + }, + { + "text": "avec", + "start": 79.4, + "end": 79.74, + "confidence": 0.964 + }, + { + "text": "le", + "start": 79.74, + "end": 79.88, + "confidence": 0.985 + }, + { + "text": "monde", + "start": 79.88, + "end": 80.24, + "confidence": 0.909 + }, + { + "text": "qui", + "start": 80.24, + "end": 81.02, + "confidence": 0.871 + }, + { + "text": "rendent", + "start": 81.02, + "end": 81.74, + "confidence": 0.623 + }, + { + "text": "un", + "start": 81.74, + "end": 81.84, + "confidence": 0.224 + }, + { + "text": "peu", + "start": 81.84, + "end": 81.88, + "confidence": 0.223 + }, + { + "text": "avec", + "start": 81.88, + "end": 82.1, + "confidence": 0.863 + }, + { + "text": "la", + "start": 82.1, + "end": 82.24, + "confidence": 0.565 + }, + { + "text": "même", + "start": 82.24, + "end": 82.32, + "confidence": 0.384 + }, + { + "text": "sédiforme", + "start": 82.32, + "end": 82.86, + "confidence": 0.29 + }, + { + "text": "de", + "start": 82.86, + "end": 82.98, + "confidence": 0.899 + } + ] + }, + { + "id": 24, + "seek": 8298, + "start": 83.0, + "end": 87.66, + "text": " le jeu. Donc, à objets inédits, rapport inédits.", + "tokens": [ + 476, + 16748, + 13, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.614, + "words": [ + { + "text": "le", + "start": 83.0, + "end": 83.12, + "confidence": 0.128 + }, + { + "text": "jeu.", + "start": 83.12, + "end": 83.64, + "confidence": 0.505 + }, + { + "text": "Donc,", + "start": 83.64, + "end": 84.48, + "confidence": 0.513 + }, + { + "text": "à", + "start": 84.48, + "end": 84.86, + "confidence": 0.634 + }, + { + "text": "objets", + "start": 84.86, + "end": 85.36, + "confidence": 0.547 + }, + { + "text": "inédits,", + "start": 85.36, + "end": 86.24, + "confidence": 0.73 + }, + { + "text": "rapport", + "start": 86.24, + "end": 86.62, + "confidence": 0.928 + }, + { + "text": "inédits.", + "start": 86.62, + "end": 87.66, + "confidence": 0.921 + } + ] + }, + { + "id": 25, + "seek": 8298, + "start": 88.08, + "end": 93.78, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses", + "tokens": [ + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 27998, + 9085 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.624, + "words": [ + { + "text": "Et,", + "start": 88.08, + "end": 88.3, + "confidence": 0.87 + }, + { + "text": "ce", + "start": 88.3, + "end": 88.8, + "confidence": 0.462 + }, + { + "text": "rapport,", + "start": 88.8, + "end": 89.28, + "confidence": 0.997 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.913 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.771 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.84, + "confidence": 0.313 + }, + { + "text": "Nicolas,", + "start": 89.84, + "end": 90.54, + "confidence": 0.358 + }, + { + "text": "frère", + "start": 90.54, + "end": 91.06, + "confidence": 0.405 + }, + { + "text": "caractérisée", + "start": 91.06, + "end": 91.7, + "confidence": 0.567 + }, + { + "text": "par", + "start": 91.7, + "end": 92.12, + "confidence": 0.868 + }, + { + "text": "un", + "start": 92.12, + "end": 92.32, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.32, + "end": 92.96, + "confidence": 0.932 + }, + { + "text": "de", + "start": 92.96, + "end": 93.24, + "confidence": 0.812 + }, + { + "text": "dépenses", + "start": 93.24, + "end": 93.78, + "confidence": 0.404 + } + ] + }, + { + "id": 26, + "seek": 8298, + "start": 94.36, + "end": 94.98, + "text": " et de rojets.", + "tokens": [ + 1030, + 368, + 744, + 73, + 1385, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.583, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.986 + }, + { + "text": "de", + "start": 94.52, + "end": 94.56, + "confidence": 0.996 + }, + { + "text": "rojets.", + "start": 94.56, + "end": 94.98, + "confidence": 0.41 + } + ] + }, + { + "id": 27, + "seek": 8298, + "start": 95.8, + "end": 100.4, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 2657, + 279 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.749, + "words": [ + { + "text": "Bon,", + "start": 95.8, + "end": 96.06, + "confidence": 0.71 + }, + { + "text": "en", + "start": 96.06, + "end": 96.54, + "confidence": 0.92 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.88, + "confidence": 0.994 + }, + { + "text": "il", + "start": 96.88, + "end": 97.1, + "confidence": 0.985 + }, + { + "text": "faudrait", + "start": 97.1, + "end": 97.5, + "confidence": 0.932 + }, + { + "text": "remonter", + "start": 97.5, + "end": 98.02, + "confidence": 0.574 + }, + { + "text": "très", + "start": 98.02, + "end": 98.46, + "confidence": 0.984 + }, + { + "text": "très", + "start": 98.46, + "end": 98.76, + "confidence": 0.597 + }, + { + "text": "finement", + "start": 98.76, + "end": 99.36, + "confidence": 0.476 + }, + { + "text": "toute", + "start": 99.36, + "end": 99.68, + "confidence": 0.367 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.02, + "confidence": 0.909 + }, + { + "text": "des", + "start": 100.02, + "end": 100.2, + "confidence": 0.949 + }, + { + "text": "objectes", + "start": 100.2, + "end": 100.4, + "confidence": 0.805 + } + ] + }, + { + "id": 28, + "seek": 8298, + "start": 100.4, + "end": 105.14, + "text": " techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement", + "tokens": [ + 7512, + 1030, + 368, + 9580, + 1536, + 526, + 2405, + 2680, + 476, + 4941, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.665, + "words": [ + { + "text": "techniques", + "start": 100.4, + "end": 101.02, + "confidence": 0.954 + }, + { + "text": "et", + "start": 101.02, + "end": 101.52, + "confidence": 0.967 + }, + { + "text": "de", + "start": 101.52, + "end": 101.64, + "confidence": 0.973 + }, + { + "text": "leur", + "start": 101.64, + "end": 101.78, + "confidence": 0.811 + }, + { + "text": "infération", + "start": 101.78, + "end": 102.22, + "confidence": 0.227 + }, + { + "text": "dans", + "start": 102.22, + "end": 102.46, + "confidence": 0.518 + }, + { + "text": "le", + "start": 102.46, + "end": 102.54, + "confidence": 0.511 + }, + { + "text": "vie", + "start": 102.54, + "end": 102.78, + "confidence": 0.612 + }, + { + "text": "pour", + "start": 102.78, + "end": 103.06, + "confidence": 0.933 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.64, + "confidence": 0.954 + }, + { + "text": "si", + "start": 103.64, + "end": 103.74, + "confidence": 0.488 + }, + { + "text": "ce", + "start": 103.74, + "end": 103.86, + "confidence": 0.98 + }, + { + "text": "rapport", + "start": 103.86, + "end": 104.1, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.1, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.14, + "confidence": 0.854 + } + ] + }, + { + "id": 29, + "seek": 8298, + "start": 105.26, + "end": 105.78, + "text": " inédit.", + "tokens": [ + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.943, + "words": [ + { + "text": "inédit.", + "start": 105.26, + "end": 105.78, + "confidence": 0.943 + } + ] + }, + { + "id": 30, + "seek": 8298, + "start": 106.14, + "end": 109.36, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.798, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.34, + "confidence": 0.872 + }, + { + "text": "j'ai", + "start": 106.34, + "end": 106.9, + "confidence": 0.924 + }, + { + "text": "l'impression", + "start": 106.9, + "end": 107.28, + "confidence": 0.967 + }, + { + "text": "comme", + "start": 107.28, + "end": 107.5, + "confidence": 0.709 + }, + { + "text": "ça", + "start": 107.5, + "end": 107.68, + "confidence": 0.955 + }, + { + "text": "que", + "start": 107.68, + "end": 107.96, + "confidence": 0.91 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.36, + "confidence": 0.986 + }, + { + "text": "se", + "start": 108.36, + "end": 108.66, + "confidence": 0.569 + }, + { + "text": "trompe", + "start": 108.66, + "end": 109.0, + "confidence": 0.532 + }, + { + "text": "pas", + "start": 109.0, + "end": 109.1, + "confidence": 0.715 + }, + { + "text": "vraiment.", + "start": 109.1, + "end": 109.36, + "confidence": 0.923 + } + ] + }, + { + "id": 31, + "seek": 10970, + "start": 109.72, + "end": 114.26, + "text": " Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même", + "tokens": [ + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.829, + "words": [ + { + "text": "Pour", + "start": 109.72, + "end": 110.08, + "confidence": 0.92 + }, + { + "text": "autant,", + "start": 110.08, + "end": 110.36, + "confidence": 0.986 + }, + { + "text": "je", + "start": 110.36, + "end": 110.4, + "confidence": 0.894 + }, + { + "text": "sache.", + "start": 110.4, + "end": 110.86, + "confidence": 0.531 + }, + { + "text": "Il", + "start": 110.86, + "end": 111.16, + "confidence": 0.851 + }, + { + "text": "y", + "start": 111.16, + "end": 111.22, + "confidence": 0.872 + }, + { + "text": "a", + "start": 111.22, + "end": 111.28, + "confidence": 0.939 + }, + { + "text": "eu", + "start": 111.28, + "end": 111.38, + "confidence": 0.891 + }, + { + "text": "plein", + "start": 111.38, + "end": 111.76, + "confidence": 0.833 + }, + { + "text": "de", + "start": 111.76, + "end": 112.04, + "confidence": 0.94 + }, + { + "text": "discussions", + "start": 112.04, + "end": 112.46, + "confidence": 0.711 + }, + { + "text": "autour", + "start": 112.46, + "end": 112.96, + "confidence": 0.97 + }, + { + "text": "de", + "start": 112.96, + "end": 113.46, + "confidence": 0.952 + }, + { + "text": "la", + "start": 113.46, + "end": 113.5, + "confidence": 0.955 + }, + { + "text": "voiture", + "start": 113.5, + "end": 113.8, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.8, + "end": 114.02, + "confidence": 0.523 + }, + { + "text": "même", + "start": 114.02, + "end": 114.26, + "confidence": 0.963 + } + ] + }, + { + "id": 32, + "seek": 10970, + "start": 114.42, + "end": 118.76, + "text": " du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en", + "tokens": [ + 1581, + 47159, + 13, + 6313, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.732, + "words": [ + { + "text": "du", + "start": 114.42, + "end": 114.6, + "confidence": 0.677 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 115.16, + "confidence": 0.983 + }, + { + "text": "Mais", + "start": 115.16, + "end": 115.72, + "confidence": 0.649 + }, + { + "text": "la", + "start": 115.72, + "end": 116.0, + "confidence": 0.809 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.38, + "confidence": 0.766 + }, + { + "text": "n'était", + "start": 116.38, + "end": 116.62, + "confidence": 0.952 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.02, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.02, + "end": 117.16, + "confidence": 0.98 + }, + { + "text": "même", + "start": 117.16, + "end": 117.32, + "confidence": 0.944 + }, + { + "text": "mort,", + "start": 117.32, + "end": 117.66, + "confidence": 0.519 + }, + { + "text": "donc", + "start": 117.66, + "end": 117.78, + "confidence": 0.871 + }, + { + "text": "le", + "start": 117.78, + "end": 118.32, + "confidence": 0.932 + }, + { + "text": "rejet", + "start": 118.32, + "end": 118.62, + "confidence": 0.524 + }, + { + "text": "n'en", + "start": 118.62, + "end": 118.76, + "confidence": 0.472 + } + ] + }, + { + "id": 33, + "seek": 10970, + "start": 118.76, + "end": 119.66, + "text": " plus n'était pas du même mort.", + "tokens": [ + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.853, + "words": [ + { + "text": "plus", + "start": 118.76, + "end": 118.9, + "confidence": 0.313 + }, + { + "text": "n'était", + "start": 118.9, + "end": 119.1, + "confidence": 0.973 + }, + { + "text": "pas", + "start": 119.1, + "end": 119.26, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.26, + "end": 119.36, + "confidence": 0.988 + }, + { + "text": "même", + "start": 119.36, + "end": 119.5, + "confidence": 0.999 + }, + { + "text": "mort.", + "start": 119.5, + "end": 119.66, + "confidence": 0.984 + } + ] + }, + { + "id": 34, + "seek": 10970, + "start": 120.06, + "end": 122.94, + "text": " On peut adorer sa bagnure, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.753, + "words": [ + { + "text": "On", + "start": 120.06, + "end": 120.22, + "confidence": 0.969 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.32, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.32, + "end": 120.64, + "confidence": 0.854 + }, + { + "text": "sa", + "start": 120.64, + "end": 120.86, + "confidence": 0.918 + }, + { + "text": "bagnure,", + "start": 120.86, + "end": 121.38, + "confidence": 0.368 + }, + { + "text": "en", + "start": 121.38, + "end": 121.52, + "confidence": 0.56 + }, + { + "text": "avoir", + "start": 121.52, + "end": 121.66, + "confidence": 0.969 + }, + { + "text": "besoin", + "start": 121.66, + "end": 122.08, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 122.08, + "end": 122.34, + "confidence": 0.92 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.64, + "confidence": 0.9 + }, + { + "text": "de", + "start": 122.64, + "end": 122.78, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.78, + "end": 122.94, + "confidence": 0.994 + } + ] + }, + { + "id": 35, + "seek": 10970, + "start": 123.36, + "end": 126.38, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.826, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.48, + "confidence": 0.557 + }, + { + "text": "là,", + "start": 123.48, + "end": 123.66, + "confidence": 0.527 + }, + { + "text": "le", + "start": 123.66, + "end": 124.02, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.02, + "end": 124.5, + "confidence": 0.966 + }, + { + "text": "quand", + "start": 124.5, + "end": 124.9, + "confidence": 0.774 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.986 + }, + { + "text": "va", + "start": 125.02, + "end": 125.1, + "confidence": 0.968 + }, + { + "text": "se", + "start": 125.1, + "end": 125.2, + "confidence": 0.862 + }, + { + "text": "coucher,", + "start": 125.2, + "end": 125.64, + "confidence": 0.791 + }, + { + "text": "on", + "start": 125.64, + "end": 126.04, + "confidence": 0.974 + }, + { + "text": "la", + "start": 126.04, + "end": 126.12, + "confidence": 0.783 + }, + { + "text": "laisse.", + "start": 126.12, + "end": 126.38, + "confidence": 0.981 + } + ] + }, + { + "id": 36, + "seek": 10970, + "start": 127.06, + "end": 130.0, + "text": " On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot.", + "tokens": [ + 1282, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 11, + 6932, + 322, + 871, + 1263, + 271, + 11, + 6932, + 322, + 297, + 6, + 268, + 275, + 18832, + 1736, + 1609, + 417, + 6471, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.66, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.3, + "confidence": 0.742 + }, + { + "text": "l'a", + "start": 127.3, + "end": 127.46, + "confidence": 0.714 + }, + { + "text": "pas", + "start": 127.46, + "end": 127.64, + "confidence": 0.99 + }, + { + "text": "dans", + "start": 127.64, + "end": 127.82, + "confidence": 0.967 + }, + { + "text": "la", + "start": 127.82, + "end": 127.94, + "confidence": 0.956 + }, + { + "text": "main,", + "start": 127.94, + "end": 128.3, + "confidence": 0.978 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.44, + "confidence": 0.967 + }, + { + "text": "on", + "start": 128.44, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.66, + "confidence": 0.602 + }, + { + "text": "collis,", + "start": 128.66, + "end": 129.2, + "confidence": 0.346 + }, + { + "text": "quand", + "start": 129.2, + "end": 129.24, + "confidence": 0.524 + }, + { + "text": "on", + "start": 129.24, + "end": 129.32, + "confidence": 0.991 + }, + { + "text": "n'en", + "start": 129.32, + "end": 129.38, + "confidence": 0.537 + }, + { + "text": "mène", + "start": 129.38, + "end": 129.56, + "confidence": 0.434 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.72, + "confidence": 0.998 + }, + { + "text": "au", + "start": 129.72, + "end": 129.82, + "confidence": 0.811 + }, + { + "text": "chiot.", + "start": 129.82, + "end": 130.0, + "confidence": 0.473 + } + ] + }, + { + "id": 37, + "seek": 10970, + "start": 130.88, + "end": 135.1, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une", + "tokens": [ + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.672, + "words": [ + { + "text": "On", + "start": 130.88, + "end": 131.0, + "confidence": 0.991 + }, + { + "text": "pouvait", + "start": 131.0, + "end": 131.24, + "confidence": 0.532 + }, + { + "text": "être", + "start": 131.24, + "end": 131.7, + "confidence": 0.545 + }, + { + "text": "émervé", + "start": 131.7, + "end": 132.2, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.2, + "end": 132.42, + "confidence": 0.82 + }, + { + "text": "son", + "start": 132.42, + "end": 132.68, + "confidence": 0.448 + }, + { + "text": "mome", + "start": 132.68, + "end": 133.04, + "confidence": 0.261 + }, + { + "text": "qui", + "start": 133.04, + "end": 133.26, + "confidence": 0.901 + }, + { + "text": "occupeait", + "start": 133.26, + "end": 133.72, + "confidence": 0.641 + }, + { + "text": "la", + "start": 133.72, + "end": 133.8, + "confidence": 0.8 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.0, + "confidence": 0.976 + }, + { + "text": "de", + "start": 134.0, + "end": 134.14, + "confidence": 0.964 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.44, + "confidence": 0.977 + }, + { + "text": "pendant", + "start": 134.44, + "end": 134.8, + "confidence": 0.92 + }, + { + "text": "une", + "start": 134.8, + "end": 135.1, + "confidence": 0.838 + } + ] + }, + { + "id": 38, + "seek": 10970, + "start": 135.1, + "end": 136.84, + "text": " heure chaque soir pour discuter avec un copain.", + "tokens": [ + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.922, + "words": [ + { + "text": "heure", + "start": 135.1, + "end": 135.36, + "confidence": 0.701 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.52, + "confidence": 0.957 + }, + { + "text": "soir", + "start": 135.52, + "end": 135.76, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.76, + "end": 135.94, + "confidence": 0.983 + }, + { + "text": "discuter", + "start": 135.94, + "end": 136.26, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.26, + "end": 136.44, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.44, + "end": 136.6, + "confidence": 0.976 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.84, + "confidence": 0.948 + } + ] + }, + { + "id": 39, + "seek": 13702, + "start": 137.26, + "end": 141.8, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui", + "tokens": [ + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.716, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.52, + "confidence": 0.967 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.6, + "confidence": 0.859 + }, + { + "text": "ne", + "start": 137.6, + "end": 137.68, + "confidence": 0.681 + }, + { + "text": "ressemble", + "start": 137.68, + "end": 138.14, + "confidence": 0.743 + }, + { + "text": "pas", + "start": 138.14, + "end": 138.66, + "confidence": 0.524 + }, + { + "text": "à", + "start": 138.66, + "end": 138.94, + "confidence": 0.978 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.02, + "confidence": 0.605 + }, + { + "text": "qu'on", + "start": 139.02, + "end": 139.16, + "confidence": 0.961 + }, + { + "text": "peut", + "start": 139.16, + "end": 139.3, + "confidence": 0.586 + }, + { + "text": "ressentir", + "start": 139.3, + "end": 140.04, + "confidence": 0.898 + }, + { + "text": "à", + "start": 140.04, + "end": 140.24, + "confidence": 0.352 + }, + { + "text": "voir", + "start": 140.24, + "end": 140.48, + "confidence": 0.918 + }, + { + "text": "même", + "start": 140.48, + "end": 140.86, + "confidence": 0.324 + }, + { + "text": "mome", + "start": 140.86, + "end": 141.18, + "confidence": 0.374 + }, + { + "text": "aujourd'hui", + "start": 141.18, + "end": 141.8, + "confidence": 0.949 + } + ] + }, + { + "id": 40, + "seek": 13702, + "start": 141.92, + "end": 145.76, + "text": " continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 2993, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.584, + "words": [ + { + "text": "continuuellement", + "start": 141.92, + "end": 142.8, + "confidence": 0.327 + }, + { + "text": "avec", + "start": 142.8, + "end": 143.18, + "confidence": 0.903 + }, + { + "text": "son", + "start": 143.18, + "end": 143.36, + "confidence": 0.866 + }, + { + "text": "smartphone", + "start": 143.36, + "end": 143.64, + "confidence": 0.467 + }, + { + "text": "dans", + "start": 143.64, + "end": 143.92, + "confidence": 0.554 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.972 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.26, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 144.26, + "end": 144.52, + "confidence": 0.95 + }, + { + "text": "c'était", + "start": 144.52, + "end": 144.78, + "confidence": 0.574 + }, + { + "text": "une", + "start": 144.78, + "end": 144.94, + "confidence": 0.971 + }, + { + "text": "sorte", + "start": 144.94, + "end": 145.1, + "confidence": 0.642 + }, + { + "text": "de", + "start": 145.1, + "end": 145.18, + "confidence": 0.268 + }, + { + "text": "estimateur", + "start": 145.18, + "end": 145.76, + "confidence": 0.415 + } + ] + }, + { + "id": 41, + "seek": 13702, + "start": 145.94, + "end": 148.88, + "text": " extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 1279, + 4212, + 368, + 287, + 6, + 686, + 15970, + 271, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.401, + "words": [ + { + "text": "extère", + "start": 145.94, + "end": 146.38, + "confidence": 0.357 + }, + { + "text": "de", + "start": 146.38, + "end": 146.46, + "confidence": 0.253 + }, + { + "text": "l'intempis", + "start": 146.46, + "end": 146.7, + "confidence": 0.153 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.858 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.32, + "confidence": 0.824 + }, + { + "text": "à", + "start": 147.32, + "end": 147.44, + "confidence": 0.494 + }, + { + "text": "l'éantrénée,", + "start": 147.44, + "end": 147.94, + "confidence": 0.476 + }, + { + "text": "ça", + "start": 147.94, + "end": 148.0, + "confidence": 0.772 + }, + { + "text": "m'a", + "start": 148.0, + "end": 148.26, + "confidence": 0.532 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.42, + "confidence": 0.181 + }, + { + "text": "immédiate.", + "start": 148.42, + "end": 148.88, + "confidence": 0.537 + } + ] + }, + { + "id": 42, + "seek": 13702, + "start": 149.08, + "end": 152.02, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 2016, + 3269, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.529, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.3, + "confidence": 0.285 + }, + { + "text": "je", + "start": 149.3, + "end": 149.34, + "confidence": 0.425 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.42, + "confidence": 0.316 + }, + { + "text": "ça", + "start": 149.42, + "end": 149.62, + "confidence": 0.953 + }, + { + "text": "pour", + "start": 149.62, + "end": 149.72, + "confidence": 0.971 + }, + { + "text": "le", + "start": 149.72, + "end": 149.82, + "confidence": 0.993 + }, + { + "text": "mome,", + "start": 149.82, + "end": 150.24, + "confidence": 0.575 + }, + { + "text": "mais", + "start": 150.24, + "end": 150.44, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 150.44, + "end": 151.1, + "confidence": 0.775 + }, + { + "text": "va", + "start": 151.1, + "end": 151.28, + "confidence": 0.381 + }, + { + "text": "là", + "start": 151.28, + "end": 151.42, + "confidence": 0.633 + }, + { + "text": "pour", + "start": 151.42, + "end": 151.62, + "confidence": 0.398 + }, + { + "text": "nos", + "start": 151.62, + "end": 151.7, + "confidence": 0.807 + }, + { + "text": "aussi.", + "start": 151.7, + "end": 152.02, + "confidence": 0.145 + } + ] + }, + { + "id": 43, + "seek": 13702, + "start": 152.66, + "end": 154.4, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.851, + "words": [ + { + "text": "Donc,", + "start": 152.66, + "end": 153.0, + "confidence": 0.985 + }, + { + "text": "rapport", + "start": 153.0, + "end": 153.42, + "confidence": 0.977 + }, + { + "text": "immédiate", + "start": 153.42, + "end": 154.26, + "confidence": 0.825 + }, + { + "text": "d'accord.", + "start": 154.26, + "end": 154.4, + "confidence": 0.806 + } + ] + }, + { + "id": 44, + "seek": 13702, + "start": 154.4, + "end": 157.95, + "text": " Mais pourquoi, à ton impression qu'on en sortira jamais?", + "tokens": [ + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 14540, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.791, + "words": [ + { + "text": "Mais", + "start": 154.4, + "end": 155.9, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.9, + "end": 156.36, + "confidence": 0.959 + }, + { + "text": "à", + "start": 156.36, + "end": 156.48, + "confidence": 0.821 + }, + { + "text": "ton", + "start": 156.48, + "end": 156.66, + "confidence": 0.952 + }, + { + "text": "impression", + "start": 156.66, + "end": 156.96, + "confidence": 0.932 + }, + { + "text": "qu'on", + "start": 156.96, + "end": 157.28, + "confidence": 0.906 + }, + { + "text": "en", + "start": 157.28, + "end": 157.34, + "confidence": 0.789 + }, + { + "text": "sortira", + "start": 157.34, + "end": 157.84, + "confidence": 0.543 + }, + { + "text": "jamais?", + "start": 157.84, + "end": 157.95, + "confidence": 0.503 + } + ] + }, + { + "id": 45, + "seek": 13702, + "start": 157.95, + "end": 162.37, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette", + "tokens": [ + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.82, + "words": [ + { + "text": "Et", + "start": 157.95, + "end": 159.26, + "confidence": 0.838 + }, + { + "text": "puis,", + "start": 159.26, + "end": 159.58, + "confidence": 0.481 + }, + { + "text": "il", + "start": 159.58, + "end": 159.62, + "confidence": 0.665 + }, + { + "text": "faut", + "start": 159.62, + "end": 159.66, + "confidence": 0.942 + }, + { + "text": "en", + "start": 159.66, + "end": 159.72, + "confidence": 0.925 + }, + { + "text": "remettre", + "start": 159.72, + "end": 160.1, + "confidence": 0.985 + }, + { + "text": "la", + "start": 160.1, + "end": 160.28, + "confidence": 0.702 + }, + { + "text": "faute", + "start": 160.28, + "end": 160.62, + "confidence": 0.55 + }, + { + "text": "sur", + "start": 160.62, + "end": 160.9, + "confidence": 0.954 + }, + { + "text": "les", + "start": 160.9, + "end": 161.22, + "confidence": 0.81 + }, + { + "text": "gens", + "start": 161.22, + "end": 161.42, + "confidence": 0.985 + }, + { + "text": "qui", + "start": 161.42, + "end": 161.58, + "confidence": 0.981 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.62, + "confidence": 0.955 + }, + { + "text": "créé", + "start": 161.62, + "end": 162.3, + "confidence": 0.957 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.37, + "confidence": 0.9 + } + ] + }, + { + "id": 46, + "seek": 16228, + "start": 162.37, + "end": 165.3, + "text": " route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux.", + "tokens": [ + 7955, + 3551, + 303, + 3409, + 2449, + 1030, + 33227, + 401, + 1925, + 11, + 1956, + 257, + 6176, + 1531, + 748, + 285, + 1925, + 971, + 45781, + 11, + 3551, + 303, + 3409, + 2449, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.397, + "words": [ + { + "text": "route", + "start": 162.37, + "end": 162.6, + "confidence": 0.076 + }, + { + "text": "merveilleux", + "start": 162.6, + "end": 163.3, + "confidence": 0.635 + }, + { + "text": "et", + "start": 163.3, + "end": 163.42, + "confidence": 0.837 + }, + { + "text": "diabolique,", + "start": 163.42, + "end": 163.82, + "confidence": 0.328 + }, + { + "text": "qui", + "start": 163.82, + "end": 163.9, + "confidence": 0.365 + }, + { + "text": "a", + "start": 163.9, + "end": 163.98, + "confidence": 0.054 + }, + { + "text": "dit", + "start": 163.98, + "end": 164.06, + "confidence": 0.085 + }, + { + "text": "à", + "start": 164.06, + "end": 164.1, + "confidence": 0.433 + }, + { + "text": "bollique", + "start": 164.1, + "end": 164.3, + "confidence": 0.297 + }, + { + "text": "par", + "start": 164.3, + "end": 164.52, + "confidence": 0.634 + }, + { + "text": "coeur,", + "start": 164.52, + "end": 164.78, + "confidence": 0.361 + }, + { + "text": "merveilleux.", + "start": 164.78, + "end": 165.3, + "confidence": 0.982 + } + ] + }, + { + "id": 47, + "seek": 16228, + "start": 167.36, + "end": 168.7, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.748, + "words": [ + { + "text": "Les", + "start": 167.36, + "end": 167.4, + "confidence": 0.513 + }, + { + "text": "économistes", + "start": 167.4, + "end": 167.5, + "confidence": 0.937 + }, + { + "text": "parlent", + "start": 167.5, + "end": 167.78, + "confidence": 0.806 + }, + { + "text": "de", + "start": 167.78, + "end": 167.82, + "confidence": 0.855 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.32, + "confidence": 0.758 + }, + { + "text": "du", + "start": 168.32, + "end": 168.5, + "confidence": 0.979 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.7, + "confidence": 0.389 + } + ] + }, + { + "id": 48, + "seek": 16228, + "start": 168.84, + "end": 172.66, + "text": " Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine,", + "tokens": [ + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 6302, + 11, + 269, + 6, + 377, + 517, + 12703, + 1108, + 1433, + 533, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.487, + "words": [ + { + "text": "Ces", + "start": 168.84, + "end": 169.14, + "confidence": 0.531 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.5, + "confidence": 0.725 + }, + { + "text": "en", + "start": 169.5, + "end": 169.62, + "confidence": 0.757 + }, + { + "text": "fait,", + "start": 169.62, + "end": 169.68, + "confidence": 0.974 + }, + { + "text": "on", + "start": 169.68, + "end": 169.74, + "confidence": 0.306 + }, + { + "text": "est", + "start": 169.74, + "end": 169.86, + "confidence": 0.82 + }, + { + "text": "un", + "start": 169.86, + "end": 170.16, + "confidence": 0.245 + }, + { + "text": "santé", + "start": 170.16, + "end": 170.72, + "confidence": 0.943 + }, + { + "text": "qui", + "start": 170.72, + "end": 170.88, + "confidence": 0.839 + }, + { + "text": "a", + "start": 170.88, + "end": 170.96, + "confidence": 0.533 + }, + { + "text": "été", + "start": 170.96, + "end": 171.08, + "confidence": 0.992 + }, + { + "text": "établie,", + "start": 171.08, + "end": 171.6, + "confidence": 0.316 + }, + { + "text": "c'est", + "start": 171.6, + "end": 171.74, + "confidence": 0.55 + }, + { + "text": "un", + "start": 171.74, + "end": 171.88, + "confidence": 0.651 + }, + { + "text": "soit", + "start": 171.88, + "end": 172.12, + "confidence": 0.284 + }, + { + "text": "mon", + "start": 172.12, + "end": 172.28, + "confidence": 0.225 + }, + { + "text": "termine,", + "start": 172.28, + "end": 172.66, + "confidence": 0.288 + } + ] + }, + { + "id": 49, + "seek": 17312, + "start": 173.14, + "end": 177.42, + "text": " soit définissant des beurs, on définisse un signalétique.", + "tokens": [ + 50364, + 12703, + 40763, + 29492, + 730, + 312, + 2156, + 11, + 322, + 40763, + 7746, + 517, + 6358, + 42379, + 13, + 51436 + ], + "temperature": 0.0, + "avg_logprob": -0.9428024291992188, + "compression_ratio": 1.0169491525423728, + "no_speech_prob": 6.687085260637105e-05, + "confidence": 0.403, + "words": [ + { + "text": "soit", + "start": 173.14, + "end": 174.04, + "confidence": 0.127 + }, + { + "text": "définissant", + "start": 174.04, + "end": 175.5, + "confidence": 0.58 + }, + { + "text": "des", + "start": 175.5, + "end": 175.68, + "confidence": 0.813 + }, + { + "text": "beurs,", + "start": 175.68, + "end": 175.96, + "confidence": 0.378 + }, + { + "text": "on", + "start": 175.96, + "end": 176.04, + "confidence": 0.165 + }, + { + "text": "définisse", + "start": 176.04, + "end": 176.42, + "confidence": 0.536 + }, + { + "text": "un", + "start": 176.42, + "end": 176.6, + "confidence": 0.266 + }, + { + "text": "signalétique.", + "start": 176.6, + "end": 177.42, + "confidence": 0.544 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/accurate_japanese.mp3.words.json b/tests/expected/tiny_auto/accurate_japanese.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..a23f1c7e1e1325694aee84105506edfd0b833822 --- /dev/null +++ b/tests/expected/tiny_auto/accurate_japanese.mp3.words.json @@ -0,0 +1,1766 @@ +{ + "text": "いきます ニュースタブでのサイトメイション機が 実際と違う検に関するご質問いただいております同じ度メインでデレクトリーごとに 別再度として管理上をしているサイトバリマスのサプテレクトリーごとにわけたサイトは それぞれ パブリシャーセンターに登録していくぐるニュース上ではサイトとして認識され パブリコンアイコンはサイトボトフのものが 正しく表示されますしかし ルグル検査結果のニュースタブでは パブリコンサイトメイショー機ともに 正しくない医療時にいいなりますこちらいたいです ものではありません パブリコンはサブテレクトリーごとに設定した", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 6.62, + "text": "いきます ニュースタブでのサイトメイション機が 実際と違う検に関するご質問いただいております", + "tokens": [ + 50364, + 47348, + 15096, + 233, + 26167, + 3384, + 9550, + 12144, + 28889, + 2474, + 2972, + 23607, + 8040, + 7588, + 21647, + 8040, + 43891, + 4824, + 17543, + 5142, + 220, + 33197, + 34837, + 3193, + 49806, + 38739, + 250, + 4108, + 5196, + 95, + 22570, + 9991, + 43450, + 11361, + 32418, + 18549, + 6117, + 19420, + 50712 + ], + "temperature": 0.0, + "avg_logprob": -0.44024292115242253, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.18976172804832458, + "confidence": 0.549, + "words": [ + { + "text": "いきます", + "start": 0.08, + "end": 0.6, + "confidence": 0.173 + }, + { + "text": " ニ", + "start": 0.6, + "end": 1.32, + "confidence": 0.187 + }, + { + "text": "ュ", + "start": 1.32, + "end": 1.38, + "confidence": 0.974 + }, + { + "text": "ー", + "start": 1.38, + "end": 1.5, + "confidence": 0.945 + }, + { + "text": "ス", + "start": 1.5, + "end": 1.52, + "confidence": 0.961 + }, + { + "text": "タ", + "start": 1.52, + "end": 1.68, + "confidence": 0.87 + }, + { + "text": "ブ", + "start": 1.68, + "end": 1.8, + "confidence": 0.772 + }, + { + "text": "で", + "start": 1.8, + "end": 2.02, + "confidence": 0.948 + }, + { + "text": "の", + "start": 2.02, + "end": 2.26, + "confidence": 0.838 + }, + { + "text": "サ", + "start": 2.26, + "end": 2.56, + "confidence": 0.015 + }, + { + "text": "イ", + "start": 2.56, + "end": 2.66, + "confidence": 0.983 + }, + { + "text": "ト", + "start": 2.66, + "end": 2.76, + "confidence": 0.992 + }, + { + "text": "メ", + "start": 2.76, + "end": 2.94, + "confidence": 0.386 + }, + { + "text": "イ", + "start": 2.94, + "end": 3.0, + "confidence": 0.234 + }, + { + "text": "ショ", + "start": 3.0, + "end": 3.14, + "confidence": 0.894 + }, + { + "text": "ン", + "start": 3.14, + "end": 3.2, + "confidence": 0.983 + }, + { + "text": "機", + "start": 3.2, + "end": 3.32, + "confidence": 0.363 + }, + { + "text": "が", + "start": 3.32, + "end": 3.5, + "confidence": 0.64 + }, + { + "text": " ", + "start": 3.5, + "end": 3.6, + "confidence": 0.232 + }, + { + "text": "実", + "start": 3.6, + "end": 3.76, + "confidence": 0.751 + }, + { + "text": "際", + "start": 3.76, + "end": 3.9, + "confidence": 0.98 + }, + { + "text": "と", + "start": 3.9, + "end": 4.08, + "confidence": 0.928 + }, + { + "text": "違う", + "start": 4.08, + "end": 4.38, + "confidence": 0.898 + }, + { + "text": "検", + "start": 4.38, + "end": 4.64, + "confidence": 0.327 + }, + { + "text": "に", + "start": 4.64, + "end": 4.74, + "confidence": 0.187 + }, + { + "text": "関", + "start": 4.74, + "end": 4.94, + "confidence": 0.497 + }, + { + "text": "する", + "start": 4.94, + "end": 5.12, + "confidence": 0.988 + }, + { + "text": "ご", + "start": 5.12, + "end": 5.3, + "confidence": 0.649 + }, + { + "text": "質", + "start": 5.3, + "end": 5.46, + "confidence": 0.894 + }, + { + "text": "問", + "start": 5.46, + "end": 5.62, + "confidence": 0.992 + }, + { + "text": "いただ", + "start": 5.62, + "end": 5.92, + "confidence": 0.936 + }, + { + "text": "いて", + "start": 5.92, + "end": 6.14, + "confidence": 0.981 + }, + { + "text": "お", + "start": 6.14, + "end": 6.26, + "confidence": 0.817 + }, + { + "text": "ります", + "start": 6.26, + "end": 6.62, + "confidence": 0.971 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.94, + "end": 13.58, + "text": "同じ度メインでデレクトリーごとに 別再度として管理上をしているサイトバリマスの", + "tokens": [ + 50712, + 13089, + 9257, + 13127, + 21647, + 8040, + 4824, + 2474, + 31327, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 23131, + 13876, + 5708, + 5998, + 8822, + 22979, + 23607, + 8040, + 7588, + 18593, + 12376, + 13258, + 9550, + 2972, + 51044 + ], + "temperature": 0.0, + "avg_logprob": -0.44024292115242253, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.18976172804832458, + "confidence": 0.525, + "words": [ + { + "text": "同", + "start": 6.94, + "end": 7.28, + "confidence": 0.469 + }, + { + "text": "じ", + "start": 7.28, + "end": 7.44, + "confidence": 0.218 + }, + { + "text": "度", + "start": 7.44, + "end": 7.62, + "confidence": 0.139 + }, + { + "text": "メ", + "start": 7.62, + "end": 7.74, + "confidence": 0.209 + }, + { + "text": "イ", + "start": 7.74, + "end": 7.82, + "confidence": 0.904 + }, + { + "text": "ン", + "start": 7.82, + "end": 7.84, + "confidence": 0.911 + }, + { + "text": "で", + "start": 7.84, + "end": 8.0, + "confidence": 0.979 + }, + { + "text": "デ", + "start": 8.0, + "end": 8.14, + "confidence": 0.37 + }, + { + "text": "レ", + "start": 8.14, + "end": 8.28, + "confidence": 0.674 + }, + { + "text": "ク", + "start": 8.28, + "end": 8.36, + "confidence": 0.974 + }, + { + "text": "ト", + "start": 8.36, + "end": 8.5, + "confidence": 0.979 + }, + { + "text": "リ", + "start": 8.5, + "end": 8.62, + "confidence": 0.96 + }, + { + "text": "ー", + "start": 8.62, + "end": 8.72, + "confidence": 0.701 + }, + { + "text": "ご", + "start": 8.72, + "end": 8.86, + "confidence": 0.267 + }, + { + "text": "と", + "start": 8.86, + "end": 9.0, + "confidence": 0.993 + }, + { + "text": "に", + "start": 9.0, + "end": 9.14, + "confidence": 0.967 + }, + { + "text": " ", + "start": 9.14, + "end": 9.34, + "confidence": 0.12 + }, + { + "text": "別", + "start": 9.34, + "end": 9.4, + "confidence": 0.527 + }, + { + "text": "再", + "start": 9.4, + "end": 9.58, + "confidence": 0.342 + }, + { + "text": "度", + "start": 9.58, + "end": 9.74, + "confidence": 0.323 + }, + { + "text": "と", + "start": 9.74, + "end": 9.92, + "confidence": 0.891 + }, + { + "text": "して", + "start": 9.92, + "end": 10.4, + "confidence": 0.998 + }, + { + "text": "管", + "start": 10.4, + "end": 10.86, + "confidence": 0.418 + }, + { + "text": "理", + "start": 10.86, + "end": 11.08, + "confidence": 1.0 + }, + { + "text": "上", + "start": 11.08, + "end": 11.36, + "confidence": 0.472 + }, + { + "text": "を", + "start": 11.36, + "end": 11.56, + "confidence": 0.987 + }, + { + "text": "して", + "start": 11.56, + "end": 11.84, + "confidence": 0.931 + }, + { + "text": "いる", + "start": 11.84, + "end": 12.16, + "confidence": 0.939 + }, + { + "text": "サ", + "start": 12.16, + "end": 12.52, + "confidence": 0.069 + }, + { + "text": "イ", + "start": 12.52, + "end": 12.58, + "confidence": 0.985 + }, + { + "text": "ト", + "start": 12.58, + "end": 12.7, + "confidence": 0.976 + }, + { + "text": "バ", + "start": 12.7, + "end": 12.82, + "confidence": 0.297 + }, + { + "text": "リ", + "start": 12.82, + "end": 12.96, + "confidence": 0.362 + }, + { + "text": "マ", + "start": 12.96, + "end": 13.12, + "confidence": 0.19 + }, + { + "text": "ス", + "start": 13.12, + "end": 13.22, + "confidence": 0.859 + }, + { + "text": "の", + "start": 13.22, + "end": 13.58, + "confidence": 0.7 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 13.68, + "end": 20.74, + "text": "サプテレクトリーごとにわけたサイトは それぞれ パブリシャーセンターに登録していくぐるニュース上では", + "tokens": [ + 51044, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 9206, + 7625, + 3368, + 23607, + 8040, + 7588, + 3065, + 47765, + 31563, + 4132, + 15096, + 239, + 28889, + 12376, + 11054, + 17233, + 3384, + 31223, + 4824, + 30736, + 4108, + 46246, + 8822, + 49394, + 35849, + 4895, + 34737, + 26167, + 3384, + 9550, + 5708, + 16719, + 51408 + ], + "temperature": 0.0, + "avg_logprob": -0.44024292115242253, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.18976172804832458, + "confidence": 0.622, + "words": [ + { + "text": "サ", + "start": 13.68, + "end": 14.22, + "confidence": 0.398 + }, + { + "text": "プ", + "start": 14.22, + "end": 14.32, + "confidence": 0.533 + }, + { + "text": "テ", + "start": 14.32, + "end": 14.42, + "confidence": 0.637 + }, + { + "text": "レ", + "start": 14.42, + "end": 14.58, + "confidence": 0.825 + }, + { + "text": "ク", + "start": 14.58, + "end": 14.66, + "confidence": 0.474 + }, + { + "text": "ト", + "start": 14.66, + "end": 14.78, + "confidence": 0.973 + }, + { + "text": "リ", + "start": 14.78, + "end": 14.92, + "confidence": 0.873 + }, + { + "text": "ー", + "start": 14.92, + "end": 15.04, + "confidence": 0.917 + }, + { + "text": "ご", + "start": 15.04, + "end": 15.1, + "confidence": 0.016 + }, + { + "text": "と", + "start": 15.1, + "end": 15.38, + "confidence": 0.991 + }, + { + "text": "に", + "start": 15.38, + "end": 15.86, + "confidence": 0.493 + }, + { + "text": "わ", + "start": 15.86, + "end": 16.02, + "confidence": 0.756 + }, + { + "text": "け", + "start": 16.02, + "end": 16.14, + "confidence": 0.988 + }, + { + "text": "た", + "start": 16.14, + "end": 16.28, + "confidence": 0.951 + }, + { + "text": "サ", + "start": 16.28, + "end": 16.42, + "confidence": 0.802 + }, + { + "text": "イ", + "start": 16.42, + "end": 16.5, + "confidence": 0.981 + }, + { + "text": "ト", + "start": 16.5, + "end": 16.6, + "confidence": 0.904 + }, + { + "text": "は", + "start": 16.6, + "end": 17.08, + "confidence": 0.976 + }, + { + "text": " それ", + "start": 17.08, + "end": 17.44, + "confidence": 0.128 + }, + { + "text": "ぞ", + "start": 17.44, + "end": 17.6, + "confidence": 0.85 + }, + { + "text": "れ", + "start": 17.6, + "end": 17.72, + "confidence": 0.995 + }, + { + "text": " パ", + "start": 17.72, + "end": 17.86, + "confidence": 0.178 + }, + { + "text": "ブ", + "start": 17.86, + "end": 17.94, + "confidence": 0.941 + }, + { + "text": "リ", + "start": 17.94, + "end": 18.04, + "confidence": 0.991 + }, + { + "text": "シ", + "start": 18.04, + "end": 18.18, + "confidence": 0.755 + }, + { + "text": "ャ", + "start": 18.18, + "end": 18.26, + "confidence": 0.825 + }, + { + "text": "ー", + "start": 18.26, + "end": 18.42, + "confidence": 0.765 + }, + { + "text": "セ", + "start": 18.42, + "end": 18.44, + "confidence": 0.588 + }, + { + "text": "ン", + "start": 18.44, + "end": 18.52, + "confidence": 0.832 + }, + { + "text": "ター", + "start": 18.52, + "end": 18.6, + "confidence": 0.998 + }, + { + "text": "に", + "start": 18.6, + "end": 18.76, + "confidence": 0.858 + }, + { + "text": "登録", + "start": 18.76, + "end": 19.12, + "confidence": 0.902 + }, + { + "text": "して", + "start": 19.12, + "end": 19.38, + "confidence": 0.766 + }, + { + "text": "いく", + "start": 19.38, + "end": 19.62, + "confidence": 0.877 + }, + { + "text": "ぐ", + "start": 19.62, + "end": 19.78, + "confidence": 0.221 + }, + { + "text": "る", + "start": 19.78, + "end": 19.9, + "confidence": 0.85 + }, + { + "text": "ニ", + "start": 19.9, + "end": 20.02, + "confidence": 0.68 + }, + { + "text": "ュ", + "start": 20.02, + "end": 20.14, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 20.14, + "end": 20.16, + "confidence": 0.989 + }, + { + "text": "ス", + "start": 20.16, + "end": 20.22, + "confidence": 0.982 + }, + { + "text": "上", + "start": 20.22, + "end": 20.4, + "confidence": 0.236 + }, + { + "text": "では", + "start": 20.4, + "end": 20.74, + "confidence": 0.763 + } + ] + }, + { + "id": 3, + "seek": 2088, + "start": 20.9, + "end": 27.0, + "text": "サイトとして認識され パブリコンアイコンはサイトボトフのものが 正しく表示されます", + "tokens": [ + 50364, + 23607, + 8040, + 7588, + 3193, + 8822, + 22041, + 43143, + 6722, + 4132, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 12817, + 8040, + 18066, + 4824, + 3065, + 23607, + 8040, + 7588, + 37626, + 7588, + 17320, + 2972, + 44726, + 5142, + 220, + 15789, + 26568, + 40053, + 6722, + 4132, + 5368, + 50684 + ], + "temperature": 0.0, + "avg_logprob": -0.35092457005235017, + "compression_ratio": 1.6926406926406927, + "no_speech_prob": 0.06554599106311798, + "confidence": 0.389, + "words": [ + { + "text": "サ", + "start": 20.9, + "end": 21.26, + "confidence": 0.002 + }, + { + "text": "イ", + "start": 21.26, + "end": 21.34, + "confidence": 0.939 + }, + { + "text": "ト", + "start": 21.34, + "end": 21.44, + "confidence": 0.379 + }, + { + "text": "と", + "start": 21.44, + "end": 21.58, + "confidence": 0.832 + }, + { + "text": "して", + "start": 21.58, + "end": 21.8, + "confidence": 0.981 + }, + { + "text": "認", + "start": 21.8, + "end": 22.08, + "confidence": 0.179 + }, + { + "text": "識", + "start": 22.08, + "end": 22.2, + "confidence": 0.986 + }, + { + "text": "さ", + "start": 22.2, + "end": 22.34, + "confidence": 0.982 + }, + { + "text": "れ", + "start": 22.34, + "end": 22.64, + "confidence": 0.948 + }, + { + "text": " パ", + "start": 22.64, + "end": 22.86, + "confidence": 0.141 + }, + { + "text": "ブ", + "start": 22.86, + "end": 23.0, + "confidence": 0.051 + }, + { + "text": "リ", + "start": 23.0, + "end": 23.18, + "confidence": 0.007 + }, + { + "text": "コ", + "start": 23.18, + "end": 23.22, + "confidence": 0.817 + }, + { + "text": "ン", + "start": 23.22, + "end": 23.3, + "confidence": 0.887 + }, + { + "text": "ア", + "start": 23.3, + "end": 23.46, + "confidence": 0.107 + }, + { + "text": "イ", + "start": 23.46, + "end": 23.52, + "confidence": 0.957 + }, + { + "text": "コ", + "start": 23.52, + "end": 23.64, + "confidence": 0.309 + }, + { + "text": "ン", + "start": 23.64, + "end": 23.86, + "confidence": 0.893 + }, + { + "text": "は", + "start": 23.86, + "end": 23.88, + "confidence": 0.834 + }, + { + "text": "サ", + "start": 23.88, + "end": 24.04, + "confidence": 0.642 + }, + { + "text": "イ", + "start": 24.04, + "end": 24.16, + "confidence": 0.997 + }, + { + "text": "ト", + "start": 24.16, + "end": 24.24, + "confidence": 0.989 + }, + { + "text": "ボ", + "start": 24.24, + "end": 24.38, + "confidence": 0.587 + }, + { + "text": "ト", + "start": 24.38, + "end": 24.5, + "confidence": 0.861 + }, + { + "text": "フ", + "start": 24.5, + "end": 24.66, + "confidence": 0.037 + }, + { + "text": "の", + "start": 24.66, + "end": 24.72, + "confidence": 0.659 + }, + { + "text": "もの", + "start": 24.72, + "end": 24.9, + "confidence": 0.502 + }, + { + "text": "が", + "start": 24.9, + "end": 25.08, + "confidence": 0.339 + }, + { + "text": " ", + "start": 25.08, + "end": 25.32, + "confidence": 0.283 + }, + { + "text": "正", + "start": 25.32, + "end": 25.4, + "confidence": 0.631 + }, + { + "text": "しく", + "start": 25.4, + "end": 25.64, + "confidence": 0.979 + }, + { + "text": "表示", + "start": 25.64, + "end": 26.0, + "confidence": 0.665 + }, + { + "text": "さ", + "start": 26.0, + "end": 26.28, + "confidence": 0.993 + }, + { + "text": "れ", + "start": 26.28, + "end": 26.6, + "confidence": 0.986 + }, + { + "text": "ます", + "start": 26.6, + "end": 27.0, + "confidence": 0.888 + } + ] + }, + { + "id": 4, + "seek": 2088, + "start": 27.28, + "end": 33.8, + "text": "しかし ルグル検査結果のニュースタブでは パブリコンサイトメイショー機ともに 正しくない医療時にいいなります", + "tokens": [ + 50684, + 32156, + 2849, + 220, + 9405, + 23839, + 9405, + 38739, + 250, + 17238, + 119, + 35181, + 2972, + 34737, + 26167, + 3384, + 9550, + 12144, + 28889, + 16719, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 23607, + 8040, + 7588, + 21647, + 8040, + 43891, + 3384, + 17543, + 3193, + 4801, + 4108, + 220, + 15789, + 26568, + 9311, + 9937, + 119, + 6651, + 224, + 6611, + 4108, + 13806, + 3203, + 19420, + 51024 + ], + "temperature": 0.0, + "avg_logprob": -0.35092457005235017, + "compression_ratio": 1.6926406926406927, + "no_speech_prob": 0.06554599106311798, + "confidence": 0.502, + "words": [ + { + "text": "しか", + "start": 27.28, + "end": 27.58, + "confidence": 0.909 + }, + { + "text": "し", + "start": 27.58, + "end": 27.72, + "confidence": 0.989 + }, + { + "text": " ", + "start": 27.72, + "end": 27.86, + "confidence": 0.065 + }, + { + "text": "ル", + "start": 27.86, + "end": 27.9, + "confidence": 0.236 + }, + { + "text": "グ", + "start": 27.9, + "end": 28.0, + "confidence": 0.538 + }, + { + "text": "ル", + "start": 28.0, + "end": 28.14, + "confidence": 0.989 + }, + { + "text": "検", + "start": 28.14, + "end": 28.32, + "confidence": 0.565 + }, + { + "text": "査", + "start": 28.32, + "end": 28.46, + "confidence": 0.728 + }, + { + "text": "結果", + "start": 28.46, + "end": 28.72, + "confidence": 0.977 + }, + { + "text": "の", + "start": 28.72, + "end": 28.92, + "confidence": 0.96 + }, + { + "text": "ニ", + "start": 28.92, + "end": 29.06, + "confidence": 0.777 + }, + { + "text": "ュ", + "start": 29.06, + "end": 29.14, + "confidence": 0.995 + }, + { + "text": "ー", + "start": 29.14, + "end": 29.2, + "confidence": 0.98 + }, + { + "text": "ス", + "start": 29.2, + "end": 29.26, + "confidence": 0.971 + }, + { + "text": "タ", + "start": 29.26, + "end": 29.34, + "confidence": 0.806 + }, + { + "text": "ブ", + "start": 29.34, + "end": 29.44, + "confidence": 0.685 + }, + { + "text": "では", + "start": 29.44, + "end": 29.68, + "confidence": 0.942 + }, + { + "text": " パ", + "start": 29.68, + "end": 30.02, + "confidence": 0.123 + }, + { + "text": "ブ", + "start": 30.02, + "end": 30.14, + "confidence": 0.041 + }, + { + "text": "リ", + "start": 30.14, + "end": 30.32, + "confidence": 0.021 + }, + { + "text": "コ", + "start": 30.32, + "end": 30.34, + "confidence": 0.89 + }, + { + "text": "ン", + "start": 30.34, + "end": 30.42, + "confidence": 0.918 + }, + { + "text": "サ", + "start": 30.42, + "end": 30.54, + "confidence": 0.426 + }, + { + "text": "イ", + "start": 30.54, + "end": 30.62, + "confidence": 0.979 + }, + { + "text": "ト", + "start": 30.62, + "end": 30.72, + "confidence": 0.956 + }, + { + "text": "メ", + "start": 30.72, + "end": 30.86, + "confidence": 0.251 + }, + { + "text": "イ", + "start": 30.86, + "end": 30.92, + "confidence": 0.622 + }, + { + "text": "ショ", + "start": 30.92, + "end": 31.02, + "confidence": 0.416 + }, + { + "text": "ー", + "start": 31.02, + "end": 31.08, + "confidence": 0.617 + }, + { + "text": "機", + "start": 31.08, + "end": 31.24, + "confidence": 0.126 + }, + { + "text": "と", + "start": 31.24, + "end": 31.42, + "confidence": 0.357 + }, + { + "text": "も", + "start": 31.42, + "end": 31.54, + "confidence": 0.81 + }, + { + "text": "に", + "start": 31.54, + "end": 31.66, + "confidence": 0.919 + }, + { + "text": " ", + "start": 31.66, + "end": 31.8, + "confidence": 0.402 + }, + { + "text": "正", + "start": 31.8, + "end": 31.96, + "confidence": 0.437 + }, + { + "text": "しく", + "start": 31.96, + "end": 32.18, + "confidence": 0.998 + }, + { + "text": "ない", + "start": 32.18, + "end": 32.4, + "confidence": 0.981 + }, + { + "text": "医", + "start": 32.4, + "end": 32.6, + "confidence": 0.234 + }, + { + "text": "療", + "start": 32.6, + "end": 32.76, + "confidence": 0.686 + }, + { + "text": "時", + "start": 32.76, + "end": 32.96, + "confidence": 0.452 + }, + { + "text": "に", + "start": 32.96, + "end": 33.12, + "confidence": 0.925 + }, + { + "text": "いい", + "start": 33.12, + "end": 33.36, + "confidence": 0.577 + }, + { + "text": "な", + "start": 33.36, + "end": 33.48, + "confidence": 0.807 + }, + { + "text": "ります", + "start": 33.48, + "end": 33.8, + "confidence": 0.941 + } + ] + }, + { + "id": 5, + "seek": 2088, + "start": 34.04, + "end": 40.36, + "text": "こちらいたいです ものではありません パブリコンはサブテレクトリーごとに設定した", + "tokens": [ + 51024, + 34395, + 17679, + 1764, + 4767, + 32505, + 2972, + 16719, + 14498, + 30250, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 3065, + 23607, + 28889, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 39035, + 12088, + 8533, + 51388 + ], + "temperature": 0.0, + "avg_logprob": -0.35092457005235017, + "compression_ratio": 1.6926406926406927, + "no_speech_prob": 0.06554599106311798, + "confidence": 0.364, + "words": [ + { + "text": "こちら", + "start": 34.04, + "end": 34.54, + "confidence": 0.408 + }, + { + "text": "いた", + "start": 34.54, + "end": 34.82, + "confidence": 0.255 + }, + { + "text": "い", + "start": 34.82, + "end": 34.98, + "confidence": 0.11 + }, + { + "text": "です", + "start": 34.98, + "end": 35.04, + "confidence": 0.021 + }, + { + "text": " も", + "start": 35.04, + "end": 35.18, + "confidence": 0.009 + }, + { + "text": "の", + "start": 35.18, + "end": 35.28, + "confidence": 0.812 + }, + { + "text": "では", + "start": 35.28, + "end": 35.5, + "confidence": 0.995 + }, + { + "text": "あり", + "start": 35.5, + "end": 35.72, + "confidence": 0.981 + }, + { + "text": "ません", + "start": 35.72, + "end": 36.18, + "confidence": 0.999 + }, + { + "text": " パ", + "start": 36.18, + "end": 36.9, + "confidence": 0.273 + }, + { + "text": "ブ", + "start": 36.9, + "end": 37.02, + "confidence": 0.011 + }, + { + "text": "リ", + "start": 37.02, + "end": 37.1, + "confidence": 0.237 + }, + { + "text": "コ", + "start": 37.1, + "end": 37.18, + "confidence": 0.951 + }, + { + "text": "ン", + "start": 37.18, + "end": 37.84, + "confidence": 0.832 + }, + { + "text": "は", + "start": 37.84, + "end": 37.86, + "confidence": 0.95 + }, + { + "text": "サ", + "start": 37.86, + "end": 38.22, + "confidence": 0.307 + }, + { + "text": "ブ", + "start": 38.22, + "end": 38.32, + "confidence": 0.98 + }, + { + "text": "テ", + "start": 38.32, + "end": 38.42, + "confidence": 0.317 + }, + { + "text": "レ", + "start": 38.42, + "end": 38.56, + "confidence": 0.394 + }, + { + "text": "ク", + "start": 38.56, + "end": 38.66, + "confidence": 0.752 + }, + { + "text": "ト", + "start": 38.66, + "end": 38.78, + "confidence": 0.731 + }, + { + "text": "リ", + "start": 38.78, + "end": 38.92, + "confidence": 0.988 + }, + { + "text": "ー", + "start": 38.92, + "end": 38.96, + "confidence": 0.528 + }, + { + "text": "ご", + "start": 38.96, + "end": 39.08, + "confidence": 0.051 + }, + { + "text": "と", + "start": 39.08, + "end": 39.24, + "confidence": 0.973 + }, + { + "text": "に", + "start": 39.24, + "end": 39.52, + "confidence": 0.978 + }, + { + "text": "設", + "start": 39.52, + "end": 39.84, + "confidence": 0.621 + }, + { + "text": "定", + "start": 39.84, + "end": 40.04, + "confidence": 0.996 + }, + { + "text": "した", + "start": 40.04, + "end": 40.36, + "confidence": 0.894 + } + ] + } + ], + "language": "ja", + "language_probs": { + "en": 0.006242942530661821, + "zh": 0.0014598396373912692, + "de": 0.00011257075675530359, + "es": 0.0013291992945596576, + "ru": 0.00022562954109162092, + "ko": 0.00042483757715672255, + "fr": 0.0003181886568199843, + "ja": 0.986291229724884, + "pt": 0.0003494618576951325, + "tr": 8.698792953509837e-05, + "pl": 0.0008717101882211864, + "ca": 1.300364374401397e-06, + "nl": 1.030837756843539e-05, + "ar": 1.6217354641412385e-05, + "sv": 1.837667150539346e-05, + "it": 0.00015030168287921697, + "id": 1.4651158380729612e-05, + "hi": 3.514630225254223e-05, + "fi": 3.327585000079125e-05, + "vi": 9.459498869546223e-06, + "he": 1.4766068488825113e-05, + "uk": 2.3967695597093552e-05, + "el": 3.6833007470704615e-05, + "ms": 3.487279172986746e-05, + "cs": 1.2630088349396829e-05, + "ro": 2.0182824300718494e-05, + "da": 7.237617296596e-07, + "hu": 8.413452633249108e-06, + "ta": 3.047150585189229e-06, + "no": 4.935625952384726e-07, + "th": 2.3967695597093552e-05, + "ur": 1.823366073949728e-05, + "hr": 3.243675109843025e-06, + "bg": 5.907170930186112e-07, + "lt": 1.6532277413716656e-07, + "la": 0.0001827203668653965, + "mi": 8.045069262152538e-05, + "ml": 1.6183295201699366e-06, + "cy": 6.168368418002501e-05, + "sk": 7.948966640469735e-07, + "te": 2.2119941149867373e-06, + "fa": 1.6472740753670223e-05, + "lv": 2.224660988758842e-07, + "bn": 1.0470711458765436e-05, + "sr": 5.13223938014562e-07, + "az": 6.589925192201918e-07, + "sl": 1.0286728411301738e-06, + "kn": 4.974336889063125e-07, + "et": 6.589925192201918e-07, + "mk": 2.2421092182867142e-07, + "br": 3.3016895031323656e-05, + "eu": 3.890341395162977e-05, + "is": 1.7325677958979213e-07, + "hy": 1.6438145848951535e-06, + "ne": 9.149205197900301e-07, + "mn": 2.098681579809636e-05, + "bs": 1.1565679187697242e-06, + "kk": 2.4624750949442387e-07, + "sq": 8.79870412973105e-07, + "sw": 8.886376235750504e-06, + "gl": 4.2394538468215615e-05, + "mr": 5.725426035496639e-07, + "pa": 1.6310223145410419e-06, + "si": 1.4424012988456525e-05, + "km": 4.045314926770516e-05, + "sn": 2.304950794496108e-05, + "yo": 3.3728883863659576e-06, + "so": 2.386712480983988e-07, + "af": 9.970239034373662e-07, + "oc": 5.432133093563607e-06, + "ka": 1.0864949899769272e-06, + "be": 5.692826562153641e-06, + "tg": 1.9062889933252336e-08, + "sd": 1.7092966118070763e-06, + "gu": 3.2116733450493484e-07, + "am": 1.2976332186553918e-07, + "yi": 4.197627276880667e-06, + "lo": 2.5806517101045756e-07, + "uz": 3.42395833907716e-10, + "fo": 1.428170662620687e-06, + "ht": 2.3546556349174352e-06, + "ps": 1.520279852229578e-06, + "tk": 7.731094653351533e-10, + "nn": 0.0008993811788968742, + "mt": 1.0632426494794345e-07, + "sa": 1.7263284462387674e-05, + "lb": 3.039843265995046e-09, + "my": 2.098681579809636e-05, + "bo": 1.3031010894337669e-05, + "tl": 7.029703283478739e-06, + "mg": 1.302323138219208e-09, + "as": 4.321778419580369e-07, + "tt": 3.4580756036461935e-09, + "haw": 2.868557749025058e-05, + "ln": 8.527997579221847e-07, + "ha": 6.360374449343453e-09, + "ba": 5.355172416621201e-10, + "jw": 0.00018999911844730377, + "su": 3.0877136403262284e-09 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/accurate_jp_japanese.mp3.words.json b/tests/expected/tiny_auto/accurate_jp_japanese.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..76823c4aa18dc37660ed459b7cff3811894956d9 --- /dev/null +++ b/tests/expected/tiny_auto/accurate_jp_japanese.mp3.words.json @@ -0,0 +1,1665 @@ +{ + "text": "いきます ニュースタブでのサイトメイション機が 実際と違う検に関するご質問いただいております同じ度メインでデレクトリーごとに 別再度として管理上をしているサイトバリマスのサプテレクトリーごとにわけたサイトは それぞれ パブリシャーセンターに登録していくぐるニュース上ではサイトとして認識され パブリコンアイコンはサイトボトフのものが 正しく表示されますしかし ルグル検査結果のニュースタブでは パブリコンサイトメイショー機ともに 正しくない医療時にいいなりますこちらいたいです ものではありません パブリコンはサブテレクトリーごとに設定した", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 6.62, + "text": "いきます ニュースタブでのサイトメイション機が 実際と違う検に関するご質問いただいております", + "tokens": [ + 50364, + 47348, + 15096, + 233, + 26167, + 3384, + 9550, + 12144, + 28889, + 2474, + 2972, + 23607, + 8040, + 7588, + 21647, + 8040, + 43891, + 4824, + 17543, + 5142, + 220, + 33197, + 34837, + 3193, + 49806, + 38739, + 250, + 4108, + 5196, + 95, + 22570, + 9991, + 43450, + 11361, + 32418, + 18549, + 6117, + 19420, + 50712 + ], + "temperature": 0.0, + "avg_logprob": -0.44024292115242253, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.18976172804832458, + "confidence": 0.549, + "words": [ + { + "text": "いきます", + "start": 0.08, + "end": 0.6, + "confidence": 0.173 + }, + { + "text": " ニ", + "start": 0.6, + "end": 1.32, + "confidence": 0.187 + }, + { + "text": "ュ", + "start": 1.32, + "end": 1.38, + "confidence": 0.974 + }, + { + "text": "ー", + "start": 1.38, + "end": 1.5, + "confidence": 0.945 + }, + { + "text": "ス", + "start": 1.5, + "end": 1.52, + "confidence": 0.961 + }, + { + "text": "タ", + "start": 1.52, + "end": 1.68, + "confidence": 0.87 + }, + { + "text": "ブ", + "start": 1.68, + "end": 1.8, + "confidence": 0.772 + }, + { + "text": "で", + "start": 1.8, + "end": 2.02, + "confidence": 0.948 + }, + { + "text": "の", + "start": 2.02, + "end": 2.26, + "confidence": 0.838 + }, + { + "text": "サ", + "start": 2.26, + "end": 2.56, + "confidence": 0.015 + }, + { + "text": "イ", + "start": 2.56, + "end": 2.66, + "confidence": 0.983 + }, + { + "text": "ト", + "start": 2.66, + "end": 2.76, + "confidence": 0.992 + }, + { + "text": "メ", + "start": 2.76, + "end": 2.94, + "confidence": 0.386 + }, + { + "text": "イ", + "start": 2.94, + "end": 3.0, + "confidence": 0.234 + }, + { + "text": "ショ", + "start": 3.0, + "end": 3.14, + "confidence": 0.894 + }, + { + "text": "ン", + "start": 3.14, + "end": 3.2, + "confidence": 0.983 + }, + { + "text": "機", + "start": 3.2, + "end": 3.32, + "confidence": 0.363 + }, + { + "text": "が", + "start": 3.32, + "end": 3.5, + "confidence": 0.64 + }, + { + "text": " ", + "start": 3.5, + "end": 3.6, + "confidence": 0.232 + }, + { + "text": "実", + "start": 3.6, + "end": 3.76, + "confidence": 0.751 + }, + { + "text": "際", + "start": 3.76, + "end": 3.9, + "confidence": 0.98 + }, + { + "text": "と", + "start": 3.9, + "end": 4.08, + "confidence": 0.928 + }, + { + "text": "違う", + "start": 4.08, + "end": 4.38, + "confidence": 0.898 + }, + { + "text": "検", + "start": 4.38, + "end": 4.64, + "confidence": 0.327 + }, + { + "text": "に", + "start": 4.64, + "end": 4.74, + "confidence": 0.187 + }, + { + "text": "関", + "start": 4.74, + "end": 4.94, + "confidence": 0.497 + }, + { + "text": "する", + "start": 4.94, + "end": 5.12, + "confidence": 0.988 + }, + { + "text": "ご", + "start": 5.12, + "end": 5.3, + "confidence": 0.649 + }, + { + "text": "質", + "start": 5.3, + "end": 5.46, + "confidence": 0.894 + }, + { + "text": "問", + "start": 5.46, + "end": 5.62, + "confidence": 0.992 + }, + { + "text": "いただ", + "start": 5.62, + "end": 5.92, + "confidence": 0.936 + }, + { + "text": "いて", + "start": 5.92, + "end": 6.14, + "confidence": 0.981 + }, + { + "text": "お", + "start": 6.14, + "end": 6.26, + "confidence": 0.817 + }, + { + "text": "ります", + "start": 6.26, + "end": 6.62, + "confidence": 0.971 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.94, + "end": 13.58, + "text": "同じ度メインでデレクトリーごとに 別再度として管理上をしているサイトバリマスの", + "tokens": [ + 50712, + 13089, + 9257, + 13127, + 21647, + 8040, + 4824, + 2474, + 31327, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 23131, + 13876, + 5708, + 5998, + 8822, + 22979, + 23607, + 8040, + 7588, + 18593, + 12376, + 13258, + 9550, + 2972, + 51044 + ], + "temperature": 0.0, + "avg_logprob": -0.44024292115242253, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.18976172804832458, + "confidence": 0.525, + "words": [ + { + "text": "同", + "start": 6.94, + "end": 7.28, + "confidence": 0.469 + }, + { + "text": "じ", + "start": 7.28, + "end": 7.44, + "confidence": 0.218 + }, + { + "text": "度", + "start": 7.44, + "end": 7.62, + "confidence": 0.139 + }, + { + "text": "メ", + "start": 7.62, + "end": 7.74, + "confidence": 0.209 + }, + { + "text": "イ", + "start": 7.74, + "end": 7.82, + "confidence": 0.904 + }, + { + "text": "ン", + "start": 7.82, + "end": 7.84, + "confidence": 0.911 + }, + { + "text": "で", + "start": 7.84, + "end": 8.0, + "confidence": 0.979 + }, + { + "text": "デ", + "start": 8.0, + "end": 8.14, + "confidence": 0.37 + }, + { + "text": "レ", + "start": 8.14, + "end": 8.28, + "confidence": 0.674 + }, + { + "text": "ク", + "start": 8.28, + "end": 8.36, + "confidence": 0.974 + }, + { + "text": "ト", + "start": 8.36, + "end": 8.5, + "confidence": 0.979 + }, + { + "text": "リ", + "start": 8.5, + "end": 8.62, + "confidence": 0.96 + }, + { + "text": "ー", + "start": 8.62, + "end": 8.72, + "confidence": 0.701 + }, + { + "text": "ご", + "start": 8.72, + "end": 8.86, + "confidence": 0.267 + }, + { + "text": "と", + "start": 8.86, + "end": 9.0, + "confidence": 0.993 + }, + { + "text": "に", + "start": 9.0, + "end": 9.14, + "confidence": 0.967 + }, + { + "text": " ", + "start": 9.14, + "end": 9.34, + "confidence": 0.12 + }, + { + "text": "別", + "start": 9.34, + "end": 9.4, + "confidence": 0.527 + }, + { + "text": "再", + "start": 9.4, + "end": 9.58, + "confidence": 0.342 + }, + { + "text": "度", + "start": 9.58, + "end": 9.74, + "confidence": 0.323 + }, + { + "text": "と", + "start": 9.74, + "end": 9.92, + "confidence": 0.891 + }, + { + "text": "して", + "start": 9.92, + "end": 10.4, + "confidence": 0.998 + }, + { + "text": "管", + "start": 10.4, + "end": 10.86, + "confidence": 0.418 + }, + { + "text": "理", + "start": 10.86, + "end": 11.08, + "confidence": 1.0 + }, + { + "text": "上", + "start": 11.08, + "end": 11.36, + "confidence": 0.472 + }, + { + "text": "を", + "start": 11.36, + "end": 11.56, + "confidence": 0.987 + }, + { + "text": "して", + "start": 11.56, + "end": 11.84, + "confidence": 0.931 + }, + { + "text": "いる", + "start": 11.84, + "end": 12.16, + "confidence": 0.939 + }, + { + "text": "サ", + "start": 12.16, + "end": 12.52, + "confidence": 0.069 + }, + { + "text": "イ", + "start": 12.52, + "end": 12.58, + "confidence": 0.985 + }, + { + "text": "ト", + "start": 12.58, + "end": 12.7, + "confidence": 0.976 + }, + { + "text": "バ", + "start": 12.7, + "end": 12.82, + "confidence": 0.297 + }, + { + "text": "リ", + "start": 12.82, + "end": 12.96, + "confidence": 0.362 + }, + { + "text": "マ", + "start": 12.96, + "end": 13.12, + "confidence": 0.19 + }, + { + "text": "ス", + "start": 13.12, + "end": 13.22, + "confidence": 0.859 + }, + { + "text": "の", + "start": 13.22, + "end": 13.58, + "confidence": 0.7 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 13.68, + "end": 20.74, + "text": "サプテレクトリーごとにわけたサイトは それぞれ パブリシャーセンターに登録していくぐるニュース上では", + "tokens": [ + 51044, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 9206, + 7625, + 3368, + 23607, + 8040, + 7588, + 3065, + 47765, + 31563, + 4132, + 15096, + 239, + 28889, + 12376, + 11054, + 17233, + 3384, + 31223, + 4824, + 30736, + 4108, + 46246, + 8822, + 49394, + 35849, + 4895, + 34737, + 26167, + 3384, + 9550, + 5708, + 16719, + 51408 + ], + "temperature": 0.0, + "avg_logprob": -0.44024292115242253, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.18976172804832458, + "confidence": 0.622, + "words": [ + { + "text": "サ", + "start": 13.68, + "end": 14.22, + "confidence": 0.398 + }, + { + "text": "プ", + "start": 14.22, + "end": 14.32, + "confidence": 0.533 + }, + { + "text": "テ", + "start": 14.32, + "end": 14.42, + "confidence": 0.637 + }, + { + "text": "レ", + "start": 14.42, + "end": 14.58, + "confidence": 0.825 + }, + { + "text": "ク", + "start": 14.58, + "end": 14.66, + "confidence": 0.474 + }, + { + "text": "ト", + "start": 14.66, + "end": 14.78, + "confidence": 0.973 + }, + { + "text": "リ", + "start": 14.78, + "end": 14.92, + "confidence": 0.873 + }, + { + "text": "ー", + "start": 14.92, + "end": 15.04, + "confidence": 0.917 + }, + { + "text": "ご", + "start": 15.04, + "end": 15.1, + "confidence": 0.016 + }, + { + "text": "と", + "start": 15.1, + "end": 15.38, + "confidence": 0.991 + }, + { + "text": "に", + "start": 15.38, + "end": 15.86, + "confidence": 0.493 + }, + { + "text": "わ", + "start": 15.86, + "end": 16.02, + "confidence": 0.756 + }, + { + "text": "け", + "start": 16.02, + "end": 16.14, + "confidence": 0.988 + }, + { + "text": "た", + "start": 16.14, + "end": 16.28, + "confidence": 0.951 + }, + { + "text": "サ", + "start": 16.28, + "end": 16.42, + "confidence": 0.802 + }, + { + "text": "イ", + "start": 16.42, + "end": 16.5, + "confidence": 0.981 + }, + { + "text": "ト", + "start": 16.5, + "end": 16.6, + "confidence": 0.904 + }, + { + "text": "は", + "start": 16.6, + "end": 17.08, + "confidence": 0.976 + }, + { + "text": " それ", + "start": 17.08, + "end": 17.44, + "confidence": 0.128 + }, + { + "text": "ぞ", + "start": 17.44, + "end": 17.6, + "confidence": 0.85 + }, + { + "text": "れ", + "start": 17.6, + "end": 17.72, + "confidence": 0.995 + }, + { + "text": " パ", + "start": 17.72, + "end": 17.86, + "confidence": 0.178 + }, + { + "text": "ブ", + "start": 17.86, + "end": 17.94, + "confidence": 0.941 + }, + { + "text": "リ", + "start": 17.94, + "end": 18.04, + "confidence": 0.991 + }, + { + "text": "シ", + "start": 18.04, + "end": 18.18, + "confidence": 0.755 + }, + { + "text": "ャ", + "start": 18.18, + "end": 18.26, + "confidence": 0.825 + }, + { + "text": "ー", + "start": 18.26, + "end": 18.42, + "confidence": 0.765 + }, + { + "text": "セ", + "start": 18.42, + "end": 18.44, + "confidence": 0.588 + }, + { + "text": "ン", + "start": 18.44, + "end": 18.52, + "confidence": 0.832 + }, + { + "text": "ター", + "start": 18.52, + "end": 18.6, + "confidence": 0.998 + }, + { + "text": "に", + "start": 18.6, + "end": 18.76, + "confidence": 0.858 + }, + { + "text": "登録", + "start": 18.76, + "end": 19.12, + "confidence": 0.902 + }, + { + "text": "して", + "start": 19.12, + "end": 19.38, + "confidence": 0.766 + }, + { + "text": "いく", + "start": 19.38, + "end": 19.62, + "confidence": 0.877 + }, + { + "text": "ぐ", + "start": 19.62, + "end": 19.78, + "confidence": 0.221 + }, + { + "text": "る", + "start": 19.78, + "end": 19.9, + "confidence": 0.85 + }, + { + "text": "ニ", + "start": 19.9, + "end": 20.02, + "confidence": 0.68 + }, + { + "text": "ュ", + "start": 20.02, + "end": 20.14, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 20.14, + "end": 20.16, + "confidence": 0.989 + }, + { + "text": "ス", + "start": 20.16, + "end": 20.22, + "confidence": 0.982 + }, + { + "text": "上", + "start": 20.22, + "end": 20.4, + "confidence": 0.236 + }, + { + "text": "では", + "start": 20.4, + "end": 20.74, + "confidence": 0.763 + } + ] + }, + { + "id": 3, + "seek": 2088, + "start": 20.9, + "end": 27.0, + "text": "サイトとして認識され パブリコンアイコンはサイトボトフのものが 正しく表示されます", + "tokens": [ + 50364, + 23607, + 8040, + 7588, + 3193, + 8822, + 22041, + 43143, + 6722, + 4132, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 12817, + 8040, + 18066, + 4824, + 3065, + 23607, + 8040, + 7588, + 37626, + 7588, + 17320, + 2972, + 44726, + 5142, + 220, + 15789, + 26568, + 40053, + 6722, + 4132, + 5368, + 50684 + ], + "temperature": 0.0, + "avg_logprob": -0.35092457005235017, + "compression_ratio": 1.6926406926406927, + "no_speech_prob": 0.06554599106311798, + "confidence": 0.389, + "words": [ + { + "text": "サ", + "start": 20.9, + "end": 21.26, + "confidence": 0.002 + }, + { + "text": "イ", + "start": 21.26, + "end": 21.34, + "confidence": 0.939 + }, + { + "text": "ト", + "start": 21.34, + "end": 21.44, + "confidence": 0.379 + }, + { + "text": "と", + "start": 21.44, + "end": 21.58, + "confidence": 0.832 + }, + { + "text": "して", + "start": 21.58, + "end": 21.8, + "confidence": 0.981 + }, + { + "text": "認", + "start": 21.8, + "end": 22.08, + "confidence": 0.179 + }, + { + "text": "識", + "start": 22.08, + "end": 22.2, + "confidence": 0.986 + }, + { + "text": "さ", + "start": 22.2, + "end": 22.34, + "confidence": 0.982 + }, + { + "text": "れ", + "start": 22.34, + "end": 22.64, + "confidence": 0.948 + }, + { + "text": " パ", + "start": 22.64, + "end": 22.86, + "confidence": 0.141 + }, + { + "text": "ブ", + "start": 22.86, + "end": 23.0, + "confidence": 0.051 + }, + { + "text": "リ", + "start": 23.0, + "end": 23.18, + "confidence": 0.007 + }, + { + "text": "コ", + "start": 23.18, + "end": 23.22, + "confidence": 0.817 + }, + { + "text": "ン", + "start": 23.22, + "end": 23.3, + "confidence": 0.887 + }, + { + "text": "ア", + "start": 23.3, + "end": 23.46, + "confidence": 0.107 + }, + { + "text": "イ", + "start": 23.46, + "end": 23.52, + "confidence": 0.957 + }, + { + "text": "コ", + "start": 23.52, + "end": 23.64, + "confidence": 0.309 + }, + { + "text": "ン", + "start": 23.64, + "end": 23.86, + "confidence": 0.893 + }, + { + "text": "は", + "start": 23.86, + "end": 23.88, + "confidence": 0.834 + }, + { + "text": "サ", + "start": 23.88, + "end": 24.04, + "confidence": 0.642 + }, + { + "text": "イ", + "start": 24.04, + "end": 24.16, + "confidence": 0.997 + }, + { + "text": "ト", + "start": 24.16, + "end": 24.24, + "confidence": 0.989 + }, + { + "text": "ボ", + "start": 24.24, + "end": 24.38, + "confidence": 0.587 + }, + { + "text": "ト", + "start": 24.38, + "end": 24.5, + "confidence": 0.861 + }, + { + "text": "フ", + "start": 24.5, + "end": 24.66, + "confidence": 0.037 + }, + { + "text": "の", + "start": 24.66, + "end": 24.72, + "confidence": 0.659 + }, + { + "text": "もの", + "start": 24.72, + "end": 24.9, + "confidence": 0.502 + }, + { + "text": "が", + "start": 24.9, + "end": 25.08, + "confidence": 0.339 + }, + { + "text": " ", + "start": 25.08, + "end": 25.32, + "confidence": 0.283 + }, + { + "text": "正", + "start": 25.32, + "end": 25.4, + "confidence": 0.631 + }, + { + "text": "しく", + "start": 25.4, + "end": 25.64, + "confidence": 0.979 + }, + { + "text": "表示", + "start": 25.64, + "end": 26.0, + "confidence": 0.665 + }, + { + "text": "さ", + "start": 26.0, + "end": 26.28, + "confidence": 0.993 + }, + { + "text": "れ", + "start": 26.28, + "end": 26.6, + "confidence": 0.986 + }, + { + "text": "ます", + "start": 26.6, + "end": 27.0, + "confidence": 0.888 + } + ] + }, + { + "id": 4, + "seek": 2088, + "start": 27.28, + "end": 33.8, + "text": "しかし ルグル検査結果のニュースタブでは パブリコンサイトメイショー機ともに 正しくない医療時にいいなります", + "tokens": [ + 50684, + 32156, + 2849, + 220, + 9405, + 23839, + 9405, + 38739, + 250, + 17238, + 119, + 35181, + 2972, + 34737, + 26167, + 3384, + 9550, + 12144, + 28889, + 16719, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 23607, + 8040, + 7588, + 21647, + 8040, + 43891, + 3384, + 17543, + 3193, + 4801, + 4108, + 220, + 15789, + 26568, + 9311, + 9937, + 119, + 6651, + 224, + 6611, + 4108, + 13806, + 3203, + 19420, + 51024 + ], + "temperature": 0.0, + "avg_logprob": -0.35092457005235017, + "compression_ratio": 1.6926406926406927, + "no_speech_prob": 0.06554599106311798, + "confidence": 0.502, + "words": [ + { + "text": "しか", + "start": 27.28, + "end": 27.58, + "confidence": 0.909 + }, + { + "text": "し", + "start": 27.58, + "end": 27.72, + "confidence": 0.989 + }, + { + "text": " ", + "start": 27.72, + "end": 27.86, + "confidence": 0.065 + }, + { + "text": "ル", + "start": 27.86, + "end": 27.9, + "confidence": 0.236 + }, + { + "text": "グ", + "start": 27.9, + "end": 28.0, + "confidence": 0.538 + }, + { + "text": "ル", + "start": 28.0, + "end": 28.14, + "confidence": 0.989 + }, + { + "text": "検", + "start": 28.14, + "end": 28.32, + "confidence": 0.565 + }, + { + "text": "査", + "start": 28.32, + "end": 28.46, + "confidence": 0.728 + }, + { + "text": "結果", + "start": 28.46, + "end": 28.72, + "confidence": 0.977 + }, + { + "text": "の", + "start": 28.72, + "end": 28.92, + "confidence": 0.96 + }, + { + "text": "ニ", + "start": 28.92, + "end": 29.06, + "confidence": 0.777 + }, + { + "text": "ュ", + "start": 29.06, + "end": 29.14, + "confidence": 0.995 + }, + { + "text": "ー", + "start": 29.14, + "end": 29.2, + "confidence": 0.98 + }, + { + "text": "ス", + "start": 29.2, + "end": 29.26, + "confidence": 0.971 + }, + { + "text": "タ", + "start": 29.26, + "end": 29.34, + "confidence": 0.806 + }, + { + "text": "ブ", + "start": 29.34, + "end": 29.44, + "confidence": 0.685 + }, + { + "text": "では", + "start": 29.44, + "end": 29.68, + "confidence": 0.942 + }, + { + "text": " パ", + "start": 29.68, + "end": 30.02, + "confidence": 0.123 + }, + { + "text": "ブ", + "start": 30.02, + "end": 30.14, + "confidence": 0.041 + }, + { + "text": "リ", + "start": 30.14, + "end": 30.32, + "confidence": 0.021 + }, + { + "text": "コ", + "start": 30.32, + "end": 30.34, + "confidence": 0.89 + }, + { + "text": "ン", + "start": 30.34, + "end": 30.42, + "confidence": 0.918 + }, + { + "text": "サ", + "start": 30.42, + "end": 30.54, + "confidence": 0.426 + }, + { + "text": "イ", + "start": 30.54, + "end": 30.62, + "confidence": 0.979 + }, + { + "text": "ト", + "start": 30.62, + "end": 30.72, + "confidence": 0.956 + }, + { + "text": "メ", + "start": 30.72, + "end": 30.86, + "confidence": 0.251 + }, + { + "text": "イ", + "start": 30.86, + "end": 30.92, + "confidence": 0.622 + }, + { + "text": "ショ", + "start": 30.92, + "end": 31.02, + "confidence": 0.416 + }, + { + "text": "ー", + "start": 31.02, + "end": 31.08, + "confidence": 0.617 + }, + { + "text": "機", + "start": 31.08, + "end": 31.24, + "confidence": 0.126 + }, + { + "text": "と", + "start": 31.24, + "end": 31.42, + "confidence": 0.357 + }, + { + "text": "も", + "start": 31.42, + "end": 31.54, + "confidence": 0.81 + }, + { + "text": "に", + "start": 31.54, + "end": 31.66, + "confidence": 0.919 + }, + { + "text": " ", + "start": 31.66, + "end": 31.8, + "confidence": 0.402 + }, + { + "text": "正", + "start": 31.8, + "end": 31.96, + "confidence": 0.437 + }, + { + "text": "しく", + "start": 31.96, + "end": 32.18, + "confidence": 0.998 + }, + { + "text": "ない", + "start": 32.18, + "end": 32.4, + "confidence": 0.981 + }, + { + "text": "医", + "start": 32.4, + "end": 32.6, + "confidence": 0.234 + }, + { + "text": "療", + "start": 32.6, + "end": 32.76, + "confidence": 0.686 + }, + { + "text": "時", + "start": 32.76, + "end": 32.96, + "confidence": 0.452 + }, + { + "text": "に", + "start": 32.96, + "end": 33.12, + "confidence": 0.925 + }, + { + "text": "いい", + "start": 33.12, + "end": 33.36, + "confidence": 0.577 + }, + { + "text": "な", + "start": 33.36, + "end": 33.48, + "confidence": 0.807 + }, + { + "text": "ります", + "start": 33.48, + "end": 33.8, + "confidence": 0.941 + } + ] + }, + { + "id": 5, + "seek": 2088, + "start": 34.04, + "end": 40.36, + "text": "こちらいたいです ものではありません パブリコンはサブテレクトリーごとに設定した", + "tokens": [ + 51024, + 34395, + 17679, + 1764, + 4767, + 32505, + 2972, + 16719, + 14498, + 30250, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 3065, + 23607, + 28889, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 39035, + 12088, + 8533, + 51388 + ], + "temperature": 0.0, + "avg_logprob": -0.35092457005235017, + "compression_ratio": 1.6926406926406927, + "no_speech_prob": 0.06554599106311798, + "confidence": 0.364, + "words": [ + { + "text": "こちら", + "start": 34.04, + "end": 34.54, + "confidence": 0.408 + }, + { + "text": "いた", + "start": 34.54, + "end": 34.82, + "confidence": 0.255 + }, + { + "text": "い", + "start": 34.82, + "end": 34.98, + "confidence": 0.11 + }, + { + "text": "です", + "start": 34.98, + "end": 35.04, + "confidence": 0.021 + }, + { + "text": " も", + "start": 35.04, + "end": 35.18, + "confidence": 0.009 + }, + { + "text": "の", + "start": 35.18, + "end": 35.28, + "confidence": 0.812 + }, + { + "text": "では", + "start": 35.28, + "end": 35.5, + "confidence": 0.995 + }, + { + "text": "あり", + "start": 35.5, + "end": 35.72, + "confidence": 0.981 + }, + { + "text": "ません", + "start": 35.72, + "end": 36.18, + "confidence": 0.999 + }, + { + "text": " パ", + "start": 36.18, + "end": 36.9, + "confidence": 0.273 + }, + { + "text": "ブ", + "start": 36.9, + "end": 37.02, + "confidence": 0.011 + }, + { + "text": "リ", + "start": 37.02, + "end": 37.1, + "confidence": 0.237 + }, + { + "text": "コ", + "start": 37.1, + "end": 37.18, + "confidence": 0.951 + }, + { + "text": "ン", + "start": 37.18, + "end": 37.84, + "confidence": 0.832 + }, + { + "text": "は", + "start": 37.84, + "end": 37.86, + "confidence": 0.95 + }, + { + "text": "サ", + "start": 37.86, + "end": 38.22, + "confidence": 0.307 + }, + { + "text": "ブ", + "start": 38.22, + "end": 38.32, + "confidence": 0.98 + }, + { + "text": "テ", + "start": 38.32, + "end": 38.42, + "confidence": 0.317 + }, + { + "text": "レ", + "start": 38.42, + "end": 38.56, + "confidence": 0.394 + }, + { + "text": "ク", + "start": 38.56, + "end": 38.66, + "confidence": 0.752 + }, + { + "text": "ト", + "start": 38.66, + "end": 38.78, + "confidence": 0.731 + }, + { + "text": "リ", + "start": 38.78, + "end": 38.92, + "confidence": 0.988 + }, + { + "text": "ー", + "start": 38.92, + "end": 38.96, + "confidence": 0.528 + }, + { + "text": "ご", + "start": 38.96, + "end": 39.08, + "confidence": 0.051 + }, + { + "text": "と", + "start": 39.08, + "end": 39.24, + "confidence": 0.973 + }, + { + "text": "に", + "start": 39.24, + "end": 39.52, + "confidence": 0.978 + }, + { + "text": "設", + "start": 39.52, + "end": 39.84, + "confidence": 0.621 + }, + { + "text": "定", + "start": 39.84, + "end": 40.04, + "confidence": 0.996 + }, + { + "text": "した", + "start": 40.04, + "end": 40.36, + "confidence": 0.894 + } + ] + } + ], + "language": "Japanese" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/bonjour.wav.words.json b/tests/expected/tiny_auto/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..684b5c7abd45c20f9b86718656bf99a47a5a1027 --- /dev/null +++ b/tests/expected/tiny_auto/bonjour.wav.words.json @@ -0,0 +1,133 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.96, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.698755931854248, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.019103480502963066, + "confidence": 0.828, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.96, + "confidence": 0.828 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.007303962018340826, + "zh": 0.04105774685740471, + "de": 0.0015919815050438046, + "es": 0.0021759807132184505, + "ru": 0.10402871668338776, + "ko": 0.006650333292782307, + "fr": 0.405062735080719, + "ja": 0.0019202962284907699, + "pt": 0.3331955373287201, + "tr": 0.007361247204244137, + "pl": 0.053132668137550354, + "ca": 6.393653893610463e-05, + "nl": 0.0077145216055214405, + "ar": 0.0014608843484893441, + "sv": 0.006150552537292242, + "it": 0.0026043078396469355, + "id": 0.0002558547130320221, + "hi": 0.00036081296275369823, + "fi": 5.5984699429245666e-05, + "vi": 0.0012016906403005123, + "he": 0.00032852397998794913, + "uk": 0.001734843128360808, + "el": 0.00019087926193606108, + "ms": 0.0005948800244368613, + "cs": 0.000711978180333972, + "ro": 0.0009000247810035944, + "da": 0.0001005862868623808, + "hu": 8.842102397466078e-05, + "ta": 7.971397280925885e-06, + "no": 0.00037812875234521925, + "th": 5.1174160034861416e-05, + "ur": 0.00023205013712868094, + "hr": 0.00042514156666584313, + "bg": 0.00043183661182411015, + "lt": 0.00013856348232366145, + "la": 0.000879175728186965, + "mi": 5.5984699429245666e-05, + "ml": 3.6567064398695948e-06, + "cy": 0.0005009392625652254, + "sk": 0.0006382130668498576, + "te": 1.9499542759149335e-05, + "fa": 0.00011577410623431206, + "lv": 2.0919955204590224e-05, + "bn": 3.544730498106219e-05, + "sr": 0.00010097998892888427, + "az": 3.265558916609734e-05, + "sl": 0.000493172905407846, + "kn": 1.1439115041866899e-06, + "et": 3.89254364563385e-06, + "mk": 4.1767136281123385e-05, + "br": 0.0016683823196217418, + "eu": 5.5116739531513304e-05, + "is": 9.46626823861152e-06, + "hy": 4.233576873957645e-06, + "ne": 3.3035587421181845e-06, + "mn": 3.558604294084944e-05, + "bs": 0.0002639764279592782, + "kk": 1.3091374057694338e-05, + "sq": 0.00016977150517050177, + "sw": 3.0557486752513796e-05, + "gl": 0.00019312926451675594, + "mr": 2.1630326330068783e-07, + "pa": 2.83674307866022e-06, + "si": 2.837166903191246e-05, + "km": 0.00015277782222256064, + "sn": 0.000281001441180706, + "yo": 0.00036364287370815873, + "so": 8.303907748086203e-07, + "af": 2.2182357497513294e-05, + "oc": 0.00042183310142718256, + "ka": 1.5604907730448758e-06, + "be": 0.00017311997362412512, + "tg": 2.0730703909066506e-07, + "sd": 3.5309112718096e-05, + "gu": 8.567501481593354e-07, + "am": 2.6648738185031107e-06, + "yi": 2.8594189643627033e-05, + "lo": 1.2938854524691124e-05, + "uz": 1.4941893278219709e-09, + "fo": 5.8901099691865966e-05, + "ht": 0.0001786153734428808, + "ps": 8.997561053547543e-06, + "tk": 1.2832416018682125e-08, + "nn": 0.0024085910990834236, + "mt": 8.09692846814869e-06, + "sa": 5.5766442528693005e-05, + "lb": 6.159483234569052e-08, + "my": 3.847768675768748e-05, + "bo": 9.087214129976928e-05, + "tl": 6.557092092407402e-06, + "mg": 1.5002612485659483e-08, + "as": 2.3471538952435367e-06, + "tt": 1.3769226825388614e-07, + "haw": 0.0002518880646675825, + "ln": 2.1584026399068534e-05, + "ha": 7.045330541188832e-09, + "ba": 1.9039308796209298e-08, + "jw": 0.00028542656218633056, + "su": 9.29715771036399e-09 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny_auto/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..169d38901f5f7c7bfce418ba495073fee06b134d --- /dev/null +++ b/tests/expected/tiny_auto/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,255 @@ +{ + "text": " Боже улыл! Эскому зарегиан! Боже улыл! Эскому зарегиан!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 1.88, + "text": " Боже улыл!", + "tokens": [ + 50364, + 5697, + 9292, + 1595, + 30975, + 693, + 0, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.9444295983565482, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.04268376901745796, + "confidence": 0.317, + "words": [ + { + "text": "Боже", + "start": 0.44, + "end": 0.86, + "confidence": 0.422 + }, + { + "text": "улыл!", + "start": 0.86, + "end": 1.88, + "confidence": 0.261 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.88, + "end": 3.14, + "text": " Эскому зарегиан!", + "tokens": [ + 50464, + 5381, + 4218, + 4161, + 17821, + 4953, + 435, + 1416, + 0, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.9444295983565482, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.04268376901745796, + "confidence": 0.396, + "words": [ + { + "text": "Эскому", + "start": 1.88, + "end": 2.3, + "confidence": 0.287 + }, + { + "text": "зарегиан!", + "start": 2.3, + "end": 3.14, + "confidence": 0.505 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 32.98, + "end": 34.02, + "text": " Боже улыл!", + "tokens": [ + 50364, + 5697, + 9292, + 1595, + 30975, + 693, + 0, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.30732538825587224, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.22181619703769684, + "confidence": 0.577, + "words": [ + { + "text": "Боже", + "start": 32.98, + "end": 33.36, + "confidence": 0.444 + }, + { + "text": "улыл!", + "start": 33.36, + "end": 34.02, + "confidence": 0.686 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 34.42, + "end": 35.72, + "text": " Эскому зарегиан!", + "tokens": [ + 50564, + 5381, + 4218, + 4161, + 17821, + 4953, + 435, + 1416, + 0, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.30732538825587224, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.22181619703769684, + "confidence": 0.849, + "words": [ + { + "text": "Эскому", + "start": 34.42, + "end": 34.84, + "confidence": 0.69 + }, + { + "text": "зарегиан!", + "start": 34.84, + "end": 35.72, + "confidence": 0.992 + } + ] + } + ], + "language": "ru", + "language_probs": { + "en": 0.050850968807935715, + "zh": 0.000681403384078294, + "de": 0.0007310390938073397, + "es": 0.01434317510575056, + "ru": 0.5135765671730042, + "ko": 0.0008348728879354894, + "fr": 0.021197838708758354, + "ja": 0.0015719811199232936, + "pt": 0.20269688963890076, + "tr": 0.0052354964427649975, + "pl": 0.10598251223564148, + "ca": 0.00012216888717375696, + "nl": 0.008631874807178974, + "ar": 0.000931367976590991, + "sv": 0.0007842904306016862, + "it": 0.0003548910026438534, + "id": 0.00036189061938785017, + "hi": 3.1131399737205356e-05, + "fi": 7.015569281065837e-05, + "vi": 3.69694535038434e-05, + "he": 0.00010449659021105617, + "uk": 0.004842042922973633, + "el": 0.004880018997937441, + "ms": 0.00024969701189547777, + "cs": 0.0017952588386833668, + "ro": 0.0008414208423346281, + "da": 1.0758723874459974e-05, + "hu": 0.0007601603865623474, + "ta": 3.681982661873917e-06, + "no": 0.00013261307321954519, + "th": 1.203564124807599e-06, + "ur": 0.0001061421717167832, + "hr": 0.0008414208423346281, + "bg": 0.007558321580290794, + "lt": 0.0001061421717167832, + "la": 0.002434731926769018, + "mi": 6.275507985264994e-06, + "ml": 4.9546488298801705e-06, + "cy": 0.0004521428782027215, + "sk": 0.0019110430730506778, + "te": 1.4086941519053653e-05, + "fa": 6.338038656394929e-05, + "lv": 9.66435472946614e-05, + "bn": 2.2248606910579838e-05, + "sr": 0.00021525226475205272, + "az": 5.379023787099868e-05, + "sl": 0.0004965817788615823, + "kn": 5.735422519137501e-07, + "et": 3.4928464174299734e-06, + "mk": 0.0002401312522124499, + "br": 0.002695002593100071, + "eu": 4.710030407295562e-05, + "is": 5.485115616465919e-05, + "hy": 4.196751433482859e-06, + "ne": 1.044653913595539e-06, + "mn": 1.002823410090059e-05, + "bs": 0.0004433976428117603, + "kk": 1.2286848686926533e-05, + "sq": 0.00021864200243726373, + "sw": 1.3494463019014802e-05, + "gl": 0.00066561863059178, + "mr": 8.312456998282869e-07, + "pa": 1.8514169823902193e-06, + "si": 0.00010490557906450704, + "km": 0.00046832123189233243, + "sn": 0.0001612164924154058, + "yo": 0.00029421495855785906, + "so": 4.19202905277416e-07, + "af": 1.9179498849553056e-05, + "oc": 0.0002689342654775828, + "ka": 6.563843271578662e-06, + "be": 0.00028965360252186656, + "tg": 3.0341843171299843e-07, + "sd": 1.2977498954569455e-05, + "gu": 2.2012378053659631e-07, + "am": 1.0405813100078376e-06, + "yi": 0.0001417183957528323, + "lo": 9.465352377446834e-07, + "uz": 1.9853909627443045e-09, + "fo": 8.411696398979984e-06, + "ht": 4.673376679420471e-05, + "ps": 3.85115527024027e-06, + "tk": 1.2043804176187223e-08, + "nn": 0.03361043706536293, + "mt": 1.2383216017042287e-05, + "sa": 6.826336175436154e-05, + "lb": 8.868869372236077e-08, + "my": 2.013623316088342e-06, + "bo": 4.737001745525049e-06, + "tl": 7.4088115979975555e-06, + "mg": 8.133345552607807e-09, + "as": 1.8805726540449541e-06, + "tt": 9.040726354214712e-07, + "haw": 0.0020342946518212557, + "ln": 7.251335773617029e-06, + "ha": 3.7309964007192775e-09, + "ba": 1.685231154624489e-08, + "jw": 0.0009760652319528162, + "su": 1.2209596000900547e-08 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/empty.mp3.words.json b/tests/expected/tiny_auto/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f28641759ddd0498422bc0f176ef04816940492a --- /dev/null +++ b/tests/expected/tiny_auto/empty.mp3.words.json @@ -0,0 +1,106 @@ +{ + "text": "", + "segments": [], + "language": "en", + "language_probs": { + "en": 0.4808807075023651, + "zh": 0.018213840201497078, + "de": 0.013016844168305397, + "es": 0.011944927275180817, + "ru": 0.018500667065382004, + "ko": 0.051081977784633636, + "fr": 0.020003991201519966, + "ja": 0.0640711560845375, + "pt": 0.009598019532859325, + "tr": 0.0021584045607596636, + "pl": 0.016583889722824097, + "ca": 0.00040873297257348895, + "nl": 0.008274009451270103, + "ar": 0.0019806630443781614, + "sv": 0.0016941509675234556, + "it": 0.007833674550056458, + "id": 0.0019499557092785835, + "hi": 0.0014045004500076175, + "fi": 0.0012688604183495045, + "vi": 0.0008128625340759754, + "he": 0.0004853823338635266, + "uk": 0.000628131499979645, + "el": 0.0008192379609681666, + "ms": 0.0028371689841151237, + "cs": 0.0010437361197546124, + "ro": 0.0007517749909311533, + "da": 0.00025181309320032597, + "hu": 0.0009652981534600258, + "ta": 0.0002222242474090308, + "no": 0.000294399302219972, + "th": 0.0009804993169382215, + "ur": 0.00014919429668225348, + "hr": 0.00012861353752668947, + "bg": 0.00039001571713015437, + "lt": 2.3605967726325616e-05, + "la": 0.015948571264743805, + "mi": 0.0006480705342255533, + "ml": 0.0005007900763303041, + "cy": 0.00887671671807766, + "sk": 0.00017038523219525814, + "te": 0.00014573821681551635, + "fa": 0.00026805358356796205, + "lv": 3.239181387471035e-05, + "bn": 0.0007696027751080692, + "sr": 4.031226126244292e-05, + "az": 2.9263566830195487e-05, + "sl": 0.00010334386752219871, + "kn": 1.471464474889217e-05, + "et": 3.0074781534494832e-05, + "mk": 2.0034123735968024e-05, + "br": 0.0027072455268353224, + "eu": 0.00014688125520478934, + "is": 4.0947084926301613e-05, + "hy": 4.031226126244292e-05, + "ne": 3.407919211895205e-05, + "mn": 0.00015758057998027653, + "bs": 8.70242656674236e-05, + "kk": 1.0312683116353583e-05, + "sq": 3.054839180549607e-05, + "sw": 6.938178557902575e-05, + "gl": 0.00048160512233152986, + "mr": 4.462160723051056e-05, + "pa": 1.774924930941779e-05, + "si": 0.00014919429668225348, + "km": 0.0005900749238207936, + "sn": 0.00034418763243593276, + "yo": 0.00016643825802020729, + "so": 2.9546331461460795e-06, + "af": 6.12292205914855e-05, + "oc": 0.00019007847004104406, + "ka": 2.6231691663269885e-05, + "be": 6.027994822943583e-05, + "tg": 1.8432780279908911e-07, + "sd": 3.1150902941590175e-05, + "gu": 1.0353045581723563e-05, + "am": 5.14518842464895e-06, + "yi": 8.303913637064397e-05, + "lo": 1.093499486159999e-05, + "uz": 2.238000673671081e-09, + "fo": 0.00017172157822642475, + "ht": 7.102714153006673e-05, + "ps": 1.2151311238994822e-05, + "tk": 1.9753235491748455e-08, + "nn": 0.21674992144107819, + "mt": 1.3293573829287197e-05, + "sa": 0.0002498534449841827, + "lb": 1.793558652707361e-07, + "my": 0.0001025396486511454, + "bo": 4.4971580791752785e-05, + "tl": 0.00036638585152104497, + "mg": 2.4098989470644483e-08, + "as": 1.7474074411438778e-05, + "tt": 9.341277973362594e-08, + "haw": 0.0036429972387850285, + "ln": 6.182085598993581e-06, + "ha": 2.126728304574499e-08, + "ba": 1.7082484760067018e-08, + "jw": 0.0037003657780587673, + "su": 3.750336219354722e-08 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/gaenswein15.mp3.words.json b/tests/expected/tiny_auto/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..db8e17d04c15d919cf201a5c61677c83cc259a24 --- /dev/null +++ b/tests/expected/tiny_auto/gaenswein15.mp3.words.json @@ -0,0 +1,438 @@ +{ + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen Rätus ist dann nicht so weitergegangen wie sich Papstbälle dick das gewünscht hatte. Das hat er als Meshale im Rätus", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.92, + "end": 8.04, + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen", + "tokens": [ + 50364, + 9233, + 6216, + 2164, + 13034, + 1418, + 376, + 14935, + 69, + 37503, + 2957, + 39498, + 3907, + 376, + 14935, + 1220, + 2959, + 978, + 39428, + 765, + 7698, + 68, + 10126, + 730, + 677, + 858, + 6282, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.6873348460477942, + "compression_ratio": 1.3293413173652695, + "no_speech_prob": 0.04554257169365883, + "confidence": 0.502, + "words": [ + { + "text": "Wie", + "start": 0.92, + "end": 1.1, + "confidence": 0.46 + }, + { + "text": "wieder", + "start": 1.1, + "end": 1.4, + "confidence": 0.89 + }, + { + "text": "zu", + "start": 1.4, + "end": 1.6, + "confidence": 0.279 + }, + { + "text": "dazu", + "start": 1.6, + "end": 1.92, + "confidence": 0.174 + }, + { + "text": "ist", + "start": 1.92, + "end": 2.14, + "confidence": 0.648 + }, + { + "text": "Meshfuchs", + "start": 2.14, + "end": 2.74, + "confidence": 0.332 + }, + { + "text": "von", + "start": 2.74, + "end": 3.22, + "confidence": 0.628 + }, + { + "text": "1962", + "start": 3.22, + "end": 4.84, + "confidence": 0.182 + }, + { + "text": "als", + "start": 4.84, + "end": 5.24, + "confidence": 0.882 + }, + { + "text": "Meshale", + "start": 5.24, + "end": 5.78, + "confidence": 0.388 + }, + { + "text": "für", + "start": 5.78, + "end": 5.92, + "confidence": 0.935 + }, + { + "text": "die", + "start": 5.92, + "end": 6.08, + "confidence": 0.973 + }, + { + "text": "außerordentliche", + "start": 6.08, + "end": 7.04, + "confidence": 0.899 + }, + { + "text": "Form", + "start": 7.04, + "end": 7.4, + "confidence": 0.642 + }, + { + "text": "des", + "start": 7.4, + "end": 7.62, + "confidence": 0.974 + }, + { + "text": "grullischen", + "start": 7.62, + "end": 8.04, + "confidence": 0.368 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 9.42, + "end": 12.78, + "text": " Rätus ist dann nicht so weitergegangen wie sich Papstbälle dick das gewünscht hatte.", + "tokens": [ + 50764, + 497, + 3628, + 301, + 1418, + 3594, + 1979, + 370, + 8988, + 432, + 47152, + 3355, + 3041, + 15919, + 372, + 65, + 31447, + 18659, + 1482, + 6906, + 3412, + 82, + 4701, + 13299, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.6873348460477942, + "compression_ratio": 1.3293413173652695, + "no_speech_prob": 0.04554257169365883, + "confidence": 0.629, + "words": [ + { + "text": "Rätus", + "start": 9.42, + "end": 9.44, + "confidence": 0.38 + }, + { + "text": "ist", + "start": 9.44, + "end": 9.62, + "confidence": 0.894 + }, + { + "text": "dann", + "start": 9.62, + "end": 9.78, + "confidence": 0.586 + }, + { + "text": "nicht", + "start": 9.78, + "end": 9.96, + "confidence": 0.979 + }, + { + "text": "so", + "start": 9.96, + "end": 10.12, + "confidence": 0.991 + }, + { + "text": "weitergegangen", + "start": 10.12, + "end": 10.88, + "confidence": 0.703 + }, + { + "text": "wie", + "start": 10.88, + "end": 11.04, + "confidence": 0.764 + }, + { + "text": "sich", + "start": 11.04, + "end": 11.24, + "confidence": 0.968 + }, + { + "text": "Papstbälle", + "start": 11.24, + "end": 11.72, + "confidence": 0.452 + }, + { + "text": "dick", + "start": 11.72, + "end": 11.9, + "confidence": 0.251 + }, + { + "text": "das", + "start": 11.9, + "end": 12.08, + "confidence": 0.832 + }, + { + "text": "gewünscht", + "start": 12.08, + "end": 12.58, + "confidence": 0.801 + }, + { + "text": "hatte.", + "start": 12.58, + "end": 12.78, + "confidence": 0.905 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 14.0, + "end": 15.28, + "text": " Das hat er als Meshale im Rätus", + "tokens": [ + 51014, + 2846, + 2385, + 1189, + 3907, + 376, + 14935, + 1220, + 566, + 497, + 3628, + 301, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.6873348460477942, + "compression_ratio": 1.3293413173652695, + "no_speech_prob": 0.04554257169365883, + "confidence": 0.409, + "words": [ + { + "text": "Das", + "start": 14.0, + "end": 14.2, + "confidence": 0.937 + }, + { + "text": "hat", + "start": 14.2, + "end": 14.4, + "confidence": 0.955 + }, + { + "text": "er", + "start": 14.4, + "end": 14.56, + "confidence": 0.735 + }, + { + "text": "als", + "start": 14.56, + "end": 14.7, + "confidence": 0.831 + }, + { + "text": "Meshale", + "start": 14.7, + "end": 15.1, + "confidence": 0.188 + }, + { + "text": "im", + "start": 15.1, + "end": 15.12, + "confidence": 0.049 + }, + { + "text": "Rätus", + "start": 15.12, + "end": 15.28, + "confidence": 0.665 + } + ] + } + ], + "language": "de", + "language_probs": { + "en": 0.003948777448385954, + "zh": 0.00014956390077713877, + "de": 0.9891475439071655, + "es": 0.00012893213715869933, + "ru": 0.00023714249255135655, + "ko": 5.588794738287106e-05, + "fr": 0.0003396909451112151, + "ja": 4.6332737838383764e-05, + "pt": 0.00025047239614650607, + "tr": 0.00022277477546595037, + "pl": 0.0002974433300551027, + "ca": 6.430743155760865e-07, + "nl": 0.0004194623325020075, + "ar": 0.00012893213715869933, + "sv": 0.0003216129553038627, + "it": 0.00012207050167489797, + "id": 5.047548370384902e-07, + "hi": 3.234545874875039e-05, + "fi": 2.2580772565561347e-05, + "vi": 3.5727969134313753e-06, + "he": 5.588794738287106e-05, + "uk": 7.803705557307694e-06, + "el": 5.75409239900182e-06, + "ms": 3.079943553530029e-06, + "cs": 5.66488324693637e-06, + "ro": 9.41306643653661e-06, + "da": 0.0003971389669459313, + "hu": 3.062406904064119e-05, + "ta": 2.978914039886149e-07, + "no": 5.9959013015031815e-05, + "th": 3.985743205703329e-06, + "ur": 3.994131839135662e-05, + "hr": 2.3430975488736294e-06, + "bg": 2.14176452573156e-06, + "lt": 4.0400067291557207e-07, + "la": 0.0016590057639405131, + "mi": 1.2373242498142645e-05, + "ml": 6.845490361229167e-07, + "cy": 0.00010857175220735371, + "sk": 8.6873791360631e-07, + "te": 4.816404270968633e-07, + "fa": 1.1899231139977928e-05, + "lv": 1.6680078260833398e-06, + "bn": 2.870823209377704e-06, + "sr": 1.942496055562515e-06, + "az": 5.038451490690932e-06, + "sl": 1.5042036466184072e-05, + "kn": 9.410256041064713e-08, + "et": 2.3157997475209413e-06, + "mk": 2.1289341134433926e-07, + "br": 9.214365127263591e-05, + "eu": 6.995197054493474e-06, + "is": 6.270454377954593e-06, + "hy": 1.0338229913031682e-05, + "ne": 6.609009801650245e-07, + "mn": 4.921735580865061e-06, + "bs": 3.079943553530029e-06, + "kk": 5.127035365148913e-07, + "sq": 1.8106053403244005e-06, + "sw": 1.1024859531971742e-06, + "gl": 4.921735580865061e-06, + "mr": 3.469125715582777e-07, + "pa": 3.0495573355437955e-07, + "si": 1.1806630027422216e-05, + "km": 4.561441164696589e-05, + "sn": 3.416361505514942e-05, + "yo": 5.620798674499383e-06, + "so": 7.300143067823228e-08, + "af": 1.3068748557998333e-05, + "oc": 1.391161003994057e-05, + "ka": 1.54265501350892e-06, + "be": 9.94217953120824e-06, + "tg": 1.757804923840922e-08, + "sd": 2.6037237148557324e-06, + "gu": 8.04901887363485e-08, + "am": 2.7017577508559043e-07, + "yi": 0.0002838224172592163, + "lo": 2.0235232511822687e-07, + "uz": 3.099218659485814e-10, + "fo": 1.7585924069862813e-05, + "ht": 7.988765901245642e-06, + "ps": 4.506953246163903e-07, + "tk": 1.6128115509772556e-09, + "nn": 0.0009676895570009947, + "mt": 1.035689592754352e-06, + "sa": 6.3196334849635605e-06, + "lb": 8.387269758713956e-07, + "my": 1.5010446077212691e-06, + "bo": 1.566948412801139e-06, + "tl": 6.506547265416884e-07, + "mg": 1.875467336986958e-09, + "as": 5.488954002430546e-08, + "tt": 4.150760180010593e-09, + "haw": 5.588794738287106e-05, + "ln": 4.4025495071764453e-07, + "ha": 2.0003279033176113e-09, + "ba": 9.598264405497048e-10, + "jw": 5.8114270359510556e-05, + "su": 8.31066104822753e-10 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/gloria.mp3.words.json b/tests/expected/tiny_auto/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..40d39b1b6acd6e54664997006cee4e3704a1704c --- /dev/null +++ b/tests/expected/tiny_auto/gloria.mp3.words.json @@ -0,0 +1,678 @@ +{ + "text": " Hello. You're my girlfriend. How are you? I'm okay. Why will be? I said she could stay with us, but she feels better. Oh, she can. There's more to be for long. What if you can stay as long as you want? My girlfriend. Really, Missia? I agree.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.34, + "end": 1.78, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.408, + "words": [ + { + "text": "Hello.", + "start": 1.34, + "end": 1.78, + "confidence": 0.408 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.96, + "end": 4.26, + "text": " You're my girlfriend.", + "tokens": [ + 50464, + 509, + 434, + 452, + 10369, + 13, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.176, + "words": [ + { + "text": "You're", + "start": 1.96, + "end": 3.12, + "confidence": 0.193 + }, + { + "text": "my", + "start": 3.12, + "end": 3.2, + "confidence": 0.206 + }, + { + "text": "girlfriend.", + "start": 3.2, + "end": 4.26, + "confidence": 0.124 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 5.52, + "end": 6.54, + "text": " How are you?", + "tokens": [ + 50614, + 1012, + 366, + 291, + 30, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.627, + "words": [ + { + "text": "How", + "start": 5.52, + "end": 5.8, + "confidence": 0.488 + }, + { + "text": "are", + "start": 5.8, + "end": 6.16, + "confidence": 0.525 + }, + { + "text": "you?", + "start": 6.16, + "end": 6.54, + "confidence": 0.96 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 6.76, + "end": 7.8, + "text": " I'm okay.", + "tokens": [ + 50714, + 286, + 478, + 1392, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.836, + "words": [ + { + "text": "I'm", + "start": 6.76, + "end": 7.1, + "confidence": 0.838 + }, + { + "text": "okay.", + "start": 7.1, + "end": 7.8, + "confidence": 0.83 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 8.38, + "end": 9.14, + "text": " Why will be?", + "tokens": [ + 50764, + 1545, + 486, + 312, + 30, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.383, + "words": [ + { + "text": "Why", + "start": 8.38, + "end": 8.66, + "confidence": 0.344 + }, + { + "text": "will", + "start": 8.66, + "end": 8.96, + "confidence": 0.522 + }, + { + "text": "be?", + "start": 8.96, + "end": 9.14, + "confidence": 0.313 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.4, + "end": 11.52, + "text": " I said she could stay with us, but she feels better.", + "tokens": [ + 50814, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 11, + 457, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.692, + "words": [ + { + "text": "I", + "start": 9.4, + "end": 9.54, + "confidence": 0.285 + }, + { + "text": "said", + "start": 9.54, + "end": 9.66, + "confidence": 0.621 + }, + { + "text": "she", + "start": 9.66, + "end": 9.82, + "confidence": 0.845 + }, + { + "text": "could", + "start": 9.82, + "end": 9.98, + "confidence": 0.911 + }, + { + "text": "stay", + "start": 9.98, + "end": 10.14, + "confidence": 0.992 + }, + { + "text": "with", + "start": 10.14, + "end": 10.26, + "confidence": 0.986 + }, + { + "text": "us,", + "start": 10.26, + "end": 10.44, + "confidence": 0.781 + }, + { + "text": "but", + "start": 10.64, + "end": 10.72, + "confidence": 0.228 + }, + { + "text": "she", + "start": 10.72, + "end": 10.94, + "confidence": 0.971 + }, + { + "text": "feels", + "start": 10.94, + "end": 11.24, + "confidence": 0.76 + }, + { + "text": "better.", + "start": 11.24, + "end": 11.52, + "confidence": 0.987 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 12.18, + "end": 13.44, + "text": " Oh, she can.", + "tokens": [ + 50964, + 876, + 11, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.286, + "words": [ + { + "text": "Oh,", + "start": 12.18, + "end": 12.6, + "confidence": 0.18 + }, + { + "text": "she", + "start": 12.8, + "end": 12.96, + "confidence": 0.686 + }, + { + "text": "can.", + "start": 12.96, + "end": 13.44, + "confidence": 0.19 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.5, + "end": 15.2, + "text": " There's more to be for long.", + "tokens": [ + 51064, + 821, + 311, + 544, + 281, + 312, + 337, + 938, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.683, + "words": [ + { + "text": "There's", + "start": 13.5, + "end": 14.44, + "confidence": 0.519 + }, + { + "text": "more", + "start": 14.44, + "end": 14.56, + "confidence": 0.811 + }, + { + "text": "to", + "start": 14.56, + "end": 14.74, + "confidence": 0.418 + }, + { + "text": "be", + "start": 14.74, + "end": 14.8, + "confidence": 0.95 + }, + { + "text": "for", + "start": 14.8, + "end": 15.0, + "confidence": 0.987 + }, + { + "text": "long.", + "start": 15.0, + "end": 15.2, + "confidence": 0.813 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.38, + "end": 16.86, + "text": " What if you can stay as long as you want?", + "tokens": [ + 51114, + 708, + 498, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 30, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.771, + "words": [ + { + "text": "What", + "start": 15.38, + "end": 15.54, + "confidence": 0.387 + }, + { + "text": "if", + "start": 15.54, + "end": 15.62, + "confidence": 0.918 + }, + { + "text": "you", + "start": 15.62, + "end": 15.7, + "confidence": 0.97 + }, + { + "text": "can", + "start": 15.7, + "end": 15.98, + "confidence": 0.946 + }, + { + "text": "stay", + "start": 15.98, + "end": 16.12, + "confidence": 0.959 + }, + { + "text": "as", + "start": 16.12, + "end": 16.24, + "confidence": 0.307 + }, + { + "text": "long", + "start": 16.24, + "end": 16.36, + "confidence": 0.993 + }, + { + "text": "as", + "start": 16.36, + "end": 16.54, + "confidence": 0.992 + }, + { + "text": "you", + "start": 16.54, + "end": 16.62, + "confidence": 0.96 + }, + { + "text": "want?", + "start": 16.62, + "end": 16.86, + "confidence": 0.82 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 16.86, + "end": 17.74, + "text": " My girlfriend.", + "tokens": [ + 51214, + 1222, + 10369, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.141, + "words": [ + { + "text": "My", + "start": 16.86, + "end": 17.34, + "confidence": 0.124 + }, + { + "text": "girlfriend.", + "start": 17.34, + "end": 17.74, + "confidence": 0.159 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 17.98, + "end": 19.1, + "text": " Really, Missia?", + "tokens": [ + 51264, + 4083, + 11, + 5275, + 654, + 30, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.387, + "words": [ + { + "text": "Really,", + "start": 17.98, + "end": 18.34, + "confidence": 0.704 + }, + { + "text": "Missia?", + "start": 18.7, + "end": 19.1, + "confidence": 0.287 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 19.72, + "end": 20.22, + "text": " I agree.", + "tokens": [ + 51314, + 286, + 3986, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.5517418488212253, + "compression_ratio": 1.4345238095238095, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.165, + "words": [ + { + "text": "I", + "start": 19.72, + "end": 20.06, + "confidence": 0.129 + }, + { + "text": "agree.", + "start": 20.06, + "end": 20.22, + "confidence": 0.211 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.8443762063980103, + "zh": 0.0022106480319052935, + "de": 0.003940905444324017, + "es": 0.005020844284445047, + "ru": 0.006651538424193859, + "ko": 0.00397181510925293, + "fr": 0.004679941106587648, + "ja": 0.0019056976307183504, + "pt": 0.0026457991916686296, + "tr": 0.0016556988703086972, + "pl": 0.0017082561971619725, + "ca": 0.0010121084051206708, + "nl": 0.005386579316109419, + "ar": 0.0022808213252574205, + "sv": 0.0031914429273456335, + "it": 0.001183274551294744, + "id": 0.00030389000312425196, + "hi": 0.0005250748945400119, + "fi": 0.0010280467104166746, + "vi": 0.0002745417586993426, + "he": 0.00021381332771852612, + "uk": 0.000865702168084681, + "el": 0.0014161941362544894, + "ms": 0.0030932524241507053, + "cs": 0.0003665613476186991, + "ro": 0.0002008590381592512, + "da": 0.0017216543201357126, + "hu": 0.0004121360252611339, + "ta": 0.0002579081046860665, + "no": 0.0005049595492891967, + "th": 0.0001601387921255082, + "ur": 0.0008132518851198256, + "hr": 7.804541382938623e-05, + "bg": 8.179088763426989e-05, + "lt": 0.00011716003791661933, + "la": 0.004261135123670101, + "mi": 0.003673328086733818, + "ml": 0.00032348925014957786, + "cy": 0.06770503520965576, + "sk": 4.4295567931840196e-05, + "te": 0.00010461213969392702, + "fa": 9.088860679185018e-05, + "lv": 0.0001247162144863978, + "bn": 0.00011716003791661933, + "sr": 4.588053343468346e-05, + "az": 2.771945764834527e-05, + "sl": 0.00023666980268899351, + "kn": 7.146808911784319e-06, + "et": 0.0001042043004417792, + "mk": 2.262396264995914e-05, + "br": 0.0006483809556812048, + "eu": 4.446893581189215e-05, + "is": 0.000752135063521564, + "hy": 1.124350728787249e-05, + "ne": 8.740671910345554e-05, + "mn": 4.210234328638762e-05, + "bs": 0.00017315131844952703, + "kk": 1.1156010259583127e-05, + "sq": 4.922262451145798e-05, + "sw": 0.0002968504268210381, + "gl": 0.0005502737476490438, + "mr": 1.3509384189092088e-05, + "pa": 8.587087904743385e-06, + "si": 3.3962591260205954e-05, + "km": 0.0001786477369023487, + "sn": 0.0002559010754339397, + "yo": 0.0001691402867436409, + "so": 2.7661242256726837e-06, + "af": 0.000508919998537749, + "oc": 7.360383460763842e-05, + "ka": 7.788150469423272e-06, + "be": 9.018131095217541e-05, + "tg": 3.4419832672938355e-07, + "sd": 6.320310785667971e-05, + "gu": 3.510393980832305e-06, + "am": 8.161911864590365e-06, + "yi": 0.00012278267240617424, + "lo": 3.0979122129792813e-06, + "uz": 2.5671094139312345e-09, + "fo": 0.0002499731199350208, + "ht": 0.00021049848874099553, + "ps": 1.2690892617683858e-05, + "tk": 4.414082255266294e-08, + "nn": 0.007898896001279354, + "mt": 4.177469963906333e-05, + "sa": 0.0001042043004417792, + "lb": 5.532596674129309e-07, + "my": 2.5636309146648273e-05, + "bo": 5.491180854733102e-05, + "tl": 0.0005502737476490438, + "mg": 8.147531360691573e-08, + "as": 4.4375542529451195e-06, + "tt": 7.103994335011521e-07, + "haw": 0.004535954911261797, + "ln": 9.806761227082461e-06, + "ha": 1.4645611656760593e-07, + "ba": 4.5977703422295235e-08, + "jw": 0.0009433886734768748, + "su": 8.766671300008966e-08 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/japanese.mp3.words.json b/tests/expected/tiny_auto/japanese.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..cd2096c7aedd0c9449d0c8d89d44a4ea7546ef32 --- /dev/null +++ b/tests/expected/tiny_auto/japanese.mp3.words.json @@ -0,0 +1,1619 @@ +{ + "text": "いきます 入室タブでの最図免証記が実際と違う県に関するご質問いただいております同じ度面でデレクトリーごとに 別再度として管理上をしている際と針ます サプテレクトリーごとにわけたサイトはそれぞれ パブリシャーセンターに登録していくグルニュース上では 別再度として認識され パンコン愛行は最図ごと物物は正しく表示されますしかし グルニュースタブでは バブコン最図免証記ともに正しくない以上 時にいいなります こちらいたしても ではありませんパブコンはサプテレクトリーごとに 設定した", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 6.62, + "text": "いきます 入室タブでの最図免証記が実際と違う県に関するご質問いただいております", + "tokens": [ + 50364, + 47348, + 220, + 14028, + 2415, + 97, + 12144, + 28889, + 2474, + 2972, + 8661, + 3919, + 111, + 2347, + 235, + 5396, + 120, + 16958, + 5142, + 33197, + 34837, + 3193, + 49806, + 2862, + 234, + 4108, + 5196, + 95, + 22570, + 9991, + 43450, + 11361, + 32418, + 18549, + 6117, + 19420, + 50712 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.55, + "words": [ + { + "text": "いきます", + "start": 0.08, + "end": 0.6, + "confidence": 0.153 + }, + { + "text": " ", + "start": 0.6, + "end": 1.18, + "confidence": 0.304 + }, + { + "text": "入", + "start": 1.18, + "end": 1.38, + "confidence": 0.436 + }, + { + "text": "室", + "start": 1.38, + "end": 1.52, + "confidence": 0.5 + }, + { + "text": "タ", + "start": 1.52, + "end": 1.68, + "confidence": 0.2 + }, + { + "text": "ブ", + "start": 1.68, + "end": 1.78, + "confidence": 0.839 + }, + { + "text": "で", + "start": 1.78, + "end": 1.98, + "confidence": 0.947 + }, + { + "text": "の", + "start": 1.98, + "end": 2.22, + "confidence": 0.905 + }, + { + "text": "最", + "start": 2.22, + "end": 2.64, + "confidence": 0.275 + }, + { + "text": "図", + "start": 2.64, + "end": 2.78, + "confidence": 0.347 + }, + { + "text": "免", + "start": 2.78, + "end": 2.98, + "confidence": 0.647 + }, + { + "text": "証", + "start": 2.98, + "end": 3.18, + "confidence": 0.283 + }, + { + "text": "記", + "start": 3.18, + "end": 3.32, + "confidence": 0.296 + }, + { + "text": "が", + "start": 3.32, + "end": 3.5, + "confidence": 0.975 + }, + { + "text": "実", + "start": 3.5, + "end": 3.74, + "confidence": 0.342 + }, + { + "text": "際", + "start": 3.74, + "end": 3.9, + "confidence": 0.975 + }, + { + "text": "と", + "start": 3.9, + "end": 4.08, + "confidence": 0.909 + }, + { + "text": "違う", + "start": 4.08, + "end": 4.38, + "confidence": 0.799 + }, + { + "text": "県", + "start": 4.38, + "end": 4.6, + "confidence": 0.404 + }, + { + "text": "に", + "start": 4.6, + "end": 4.74, + "confidence": 0.988 + }, + { + "text": "関", + "start": 4.74, + "end": 4.94, + "confidence": 0.544 + }, + { + "text": "する", + "start": 4.94, + "end": 5.12, + "confidence": 0.946 + }, + { + "text": "ご", + "start": 5.12, + "end": 5.3, + "confidence": 0.498 + }, + { + "text": "質", + "start": 5.3, + "end": 5.46, + "confidence": 0.943 + }, + { + "text": "問", + "start": 5.46, + "end": 5.6, + "confidence": 0.99 + }, + { + "text": "いただ", + "start": 5.6, + "end": 5.92, + "confidence": 0.923 + }, + { + "text": "いて", + "start": 5.92, + "end": 6.16, + "confidence": 0.982 + }, + { + "text": "お", + "start": 6.16, + "end": 6.26, + "confidence": 0.791 + }, + { + "text": "ります", + "start": 6.26, + "end": 6.62, + "confidence": 0.976 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.92, + "end": 12.9, + "text": "同じ度面でデレクトリーごとに 別再度として管理上をしている際と針", + "tokens": [ + 50712, + 13089, + 9257, + 13127, + 8833, + 2474, + 31327, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 23131, + 13876, + 5708, + 5998, + 8822, + 22979, + 34837, + 3193, + 5873, + 251, + 51010 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.524, + "words": [ + { + "text": "同", + "start": 6.92, + "end": 7.3, + "confidence": 0.834 + }, + { + "text": "じ", + "start": 7.3, + "end": 7.42, + "confidence": 0.229 + }, + { + "text": "度", + "start": 7.42, + "end": 7.6, + "confidence": 0.137 + }, + { + "text": "面", + "start": 7.6, + "end": 7.76, + "confidence": 0.294 + }, + { + "text": "で", + "start": 7.76, + "end": 8.0, + "confidence": 0.571 + }, + { + "text": "デ", + "start": 8.0, + "end": 8.16, + "confidence": 0.514 + }, + { + "text": "レ", + "start": 8.16, + "end": 8.28, + "confidence": 0.478 + }, + { + "text": "ク", + "start": 8.28, + "end": 8.36, + "confidence": 0.98 + }, + { + "text": "ト", + "start": 8.36, + "end": 8.5, + "confidence": 0.984 + }, + { + "text": "リ", + "start": 8.5, + "end": 8.62, + "confidence": 0.967 + }, + { + "text": "ー", + "start": 8.62, + "end": 8.7, + "confidence": 0.643 + }, + { + "text": "ご", + "start": 8.7, + "end": 8.84, + "confidence": 0.228 + }, + { + "text": "と", + "start": 8.84, + "end": 8.98, + "confidence": 0.988 + }, + { + "text": "に", + "start": 8.98, + "end": 9.14, + "confidence": 0.988 + }, + { + "text": " ", + "start": 9.14, + "end": 9.38, + "confidence": 0.258 + }, + { + "text": "別", + "start": 9.38, + "end": 9.42, + "confidence": 0.401 + }, + { + "text": "再", + "start": 9.42, + "end": 9.58, + "confidence": 0.432 + }, + { + "text": "度", + "start": 9.58, + "end": 9.74, + "confidence": 0.325 + }, + { + "text": "と", + "start": 9.74, + "end": 9.9, + "confidence": 0.885 + }, + { + "text": "して", + "start": 9.9, + "end": 10.3, + "confidence": 0.998 + }, + { + "text": "管", + "start": 10.3, + "end": 10.88, + "confidence": 0.819 + }, + { + "text": "理", + "start": 10.88, + "end": 11.06, + "confidence": 1.0 + }, + { + "text": "上", + "start": 11.06, + "end": 11.38, + "confidence": 0.469 + }, + { + "text": "を", + "start": 11.38, + "end": 11.54, + "confidence": 0.966 + }, + { + "text": "して", + "start": 11.54, + "end": 11.84, + "confidence": 0.916 + }, + { + "text": "いる", + "start": 11.84, + "end": 12.16, + "confidence": 0.979 + }, + { + "text": "際", + "start": 12.16, + "end": 12.54, + "confidence": 0.121 + }, + { + "text": "と", + "start": 12.54, + "end": 12.7, + "confidence": 0.714 + }, + { + "text": "針", + "start": 12.7, + "end": 12.9, + "confidence": 0.266 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.9, + "end": 19.55, + "text": "ます サプテレクトリーごとにわけたサイトはそれぞれ パブリシャーセンターに登録していく", + "tokens": [ + 51010, + 5368, + 220, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 9206, + 7625, + 3368, + 23607, + 8040, + 7588, + 3065, + 13873, + 31563, + 4132, + 15096, + 239, + 28889, + 12376, + 11054, + 17233, + 3384, + 31223, + 4824, + 30736, + 4108, + 46246, + 8822, + 49394, + 51340 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.715, + "words": [ + { + "text": "ます", + "start": 12.9, + "end": 13.22, + "confidence": 0.343 + }, + { + "text": " ", + "start": 13.22, + "end": 14.1, + "confidence": 0.339 + }, + { + "text": "サ", + "start": 14.1, + "end": 14.22, + "confidence": 0.553 + }, + { + "text": "プ", + "start": 14.22, + "end": 14.32, + "confidence": 0.785 + }, + { + "text": "テ", + "start": 14.32, + "end": 14.42, + "confidence": 0.619 + }, + { + "text": "レ", + "start": 14.42, + "end": 14.58, + "confidence": 0.865 + }, + { + "text": "ク", + "start": 14.58, + "end": 14.68, + "confidence": 0.996 + }, + { + "text": "ト", + "start": 14.68, + "end": 14.78, + "confidence": 0.999 + }, + { + "text": "リ", + "start": 14.78, + "end": 14.92, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 14.92, + "end": 15.04, + "confidence": 0.996 + }, + { + "text": "ご", + "start": 15.04, + "end": 15.1, + "confidence": 0.965 + }, + { + "text": "と", + "start": 15.1, + "end": 15.38, + "confidence": 0.998 + }, + { + "text": "に", + "start": 15.38, + "end": 15.84, + "confidence": 0.622 + }, + { + "text": "わ", + "start": 15.84, + "end": 16.02, + "confidence": 0.734 + }, + { + "text": "け", + "start": 16.02, + "end": 16.14, + "confidence": 0.992 + }, + { + "text": "た", + "start": 16.14, + "end": 16.28, + "confidence": 0.885 + }, + { + "text": "サ", + "start": 16.28, + "end": 16.42, + "confidence": 0.304 + }, + { + "text": "イ", + "start": 16.42, + "end": 16.5, + "confidence": 0.976 + }, + { + "text": "ト", + "start": 16.5, + "end": 16.6, + "confidence": 0.933 + }, + { + "text": "は", + "start": 16.6, + "end": 17.08, + "confidence": 0.894 + }, + { + "text": "それ", + "start": 17.08, + "end": 17.46, + "confidence": 0.476 + }, + { + "text": "ぞ", + "start": 17.46, + "end": 17.64, + "confidence": 0.921 + }, + { + "text": "れ", + "start": 17.64, + "end": 17.72, + "confidence": 0.998 + }, + { + "text": " パ", + "start": 17.72, + "end": 17.86, + "confidence": 0.503 + }, + { + "text": "ブ", + "start": 17.86, + "end": 17.94, + "confidence": 0.96 + }, + { + "text": "リ", + "start": 17.94, + "end": 18.06, + "confidence": 0.988 + }, + { + "text": "シ", + "start": 18.06, + "end": 18.18, + "confidence": 0.544 + }, + { + "text": "ャ", + "start": 18.18, + "end": 18.26, + "confidence": 0.911 + }, + { + "text": "ー", + "start": 18.26, + "end": 18.42, + "confidence": 0.543 + }, + { + "text": "セ", + "start": 18.42, + "end": 18.44, + "confidence": 0.368 + }, + { + "text": "ン", + "start": 18.44, + "end": 18.48, + "confidence": 0.916 + }, + { + "text": "ター", + "start": 18.48, + "end": 18.6, + "confidence": 0.996 + }, + { + "text": "に", + "start": 18.6, + "end": 18.76, + "confidence": 0.703 + }, + { + "text": "登録", + "start": 18.76, + "end": 19.12, + "confidence": 0.672 + }, + { + "text": "して", + "start": 19.12, + "end": 19.38, + "confidence": 0.438 + }, + { + "text": "いく", + "start": 19.38, + "end": 19.55, + "confidence": 0.81 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 19.55, + "end": 27.08, + "text": "グルニュース上では 別再度として認識され パンコン愛行は最図ごと物物は正しく表示されます", + "tokens": [ + 51340, + 23839, + 9405, + 34737, + 26167, + 3384, + 9550, + 5708, + 16719, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 22041, + 43143, + 6722, + 4132, + 220, + 23268, + 1047, + 111, + 18066, + 4824, + 15157, + 8082, + 3065, + 8661, + 3919, + 111, + 9991, + 3193, + 23516, + 23516, + 3065, + 15789, + 26568, + 40053, + 6722, + 4132, + 5368, + 51727 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.554, + "words": [ + { + "text": "グ", + "start": 19.55, + "end": 19.78, + "confidence": 0.699 + }, + { + "text": "ル", + "start": 19.78, + "end": 19.88, + "confidence": 0.976 + }, + { + "text": "ニ", + "start": 19.88, + "end": 20.02, + "confidence": 0.898 + }, + { + "text": "ュ", + "start": 20.02, + "end": 20.14, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 20.14, + "end": 20.18, + "confidence": 0.994 + }, + { + "text": "ス", + "start": 20.18, + "end": 20.22, + "confidence": 0.958 + }, + { + "text": "上", + "start": 20.22, + "end": 20.42, + "confidence": 0.311 + }, + { + "text": "では", + "start": 20.42, + "end": 20.82, + "confidence": 0.786 + }, + { + "text": " ", + "start": 20.82, + "end": 21.1, + "confidence": 0.598 + }, + { + "text": "別", + "start": 21.1, + "end": 21.16, + "confidence": 0.288 + }, + { + "text": "再", + "start": 21.16, + "end": 21.34, + "confidence": 0.839 + }, + { + "text": "度", + "start": 21.34, + "end": 21.46, + "confidence": 1.0 + }, + { + "text": "と", + "start": 21.46, + "end": 21.64, + "confidence": 0.991 + }, + { + "text": "して", + "start": 21.64, + "end": 21.82, + "confidence": 0.994 + }, + { + "text": "認", + "start": 21.82, + "end": 22.04, + "confidence": 0.497 + }, + { + "text": "識", + "start": 22.04, + "end": 22.2, + "confidence": 0.99 + }, + { + "text": "さ", + "start": 22.2, + "end": 22.34, + "confidence": 0.994 + }, + { + "text": "れ", + "start": 22.34, + "end": 22.7, + "confidence": 0.958 + }, + { + "text": " ", + "start": 22.7, + "end": 22.76, + "confidence": 0.202 + }, + { + "text": "パ", + "start": 22.76, + "end": 22.9, + "confidence": 0.04 + }, + { + "text": "ン", + "start": 22.9, + "end": 23.0, + "confidence": 0.332 + }, + { + "text": "コ", + "start": 23.0, + "end": 23.24, + "confidence": 0.919 + }, + { + "text": "ン", + "start": 23.24, + "end": 23.4, + "confidence": 0.808 + }, + { + "text": "愛", + "start": 23.4, + "end": 23.48, + "confidence": 0.245 + }, + { + "text": "行", + "start": 23.48, + "end": 23.64, + "confidence": 0.133 + }, + { + "text": "は", + "start": 23.64, + "end": 23.86, + "confidence": 0.959 + }, + { + "text": "最", + "start": 23.86, + "end": 24.1, + "confidence": 0.723 + }, + { + "text": "図", + "start": 24.1, + "end": 24.3, + "confidence": 0.948 + }, + { + "text": "ご", + "start": 24.3, + "end": 24.42, + "confidence": 0.154 + }, + { + "text": "と", + "start": 24.42, + "end": 24.52, + "confidence": 0.969 + }, + { + "text": "物", + "start": 24.52, + "end": 24.76, + "confidence": 0.238 + }, + { + "text": "物", + "start": 24.76, + "end": 24.92, + "confidence": 0.194 + }, + { + "text": "は", + "start": 24.92, + "end": 25.08, + "confidence": 0.567 + }, + { + "text": "正", + "start": 25.08, + "end": 25.36, + "confidence": 0.454 + }, + { + "text": "しく", + "start": 25.36, + "end": 25.64, + "confidence": 0.955 + }, + { + "text": "表示", + "start": 25.64, + "end": 26.0, + "confidence": 0.422 + }, + { + "text": "さ", + "start": 26.0, + "end": 26.28, + "confidence": 0.71 + }, + { + "text": "れ", + "start": 26.28, + "end": 26.62, + "confidence": 0.976 + }, + { + "text": "ます", + "start": 26.62, + "end": 27.08, + "confidence": 0.474 + } + ] + }, + { + "id": 4, + "seek": 2726, + "start": 27.36, + "end": 32.39, + "text": "しかし グルニュースタブでは バブコン最図免証記ともに正しくない", + "tokens": [ + 50364, + 32156, + 2849, + 220, + 23839, + 9405, + 34737, + 26167, + 3384, + 9550, + 12144, + 28889, + 16719, + 15096, + 238, + 28889, + 18066, + 4824, + 8661, + 3919, + 111, + 2347, + 235, + 5396, + 120, + 16958, + 3193, + 4801, + 4108, + 15789, + 26568, + 9311, + 50620 + ], + "temperature": 0.0, + "avg_logprob": -0.44633566366659627, + "compression_ratio": 1.375, + "no_speech_prob": 0.35541412234306335, + "confidence": 0.806, + "words": [ + { + "text": "しか", + "start": 27.36, + "end": 27.58, + "confidence": 0.958 + }, + { + "text": "し", + "start": 27.58, + "end": 27.74, + "confidence": 0.992 + }, + { + "text": " ", + "start": 27.74, + "end": 27.84, + "confidence": 0.249 + }, + { + "text": "グ", + "start": 27.84, + "end": 27.92, + "confidence": 0.445 + }, + { + "text": "ル", + "start": 27.92, + "end": 28.04, + "confidence": 0.995 + }, + { + "text": "ニ", + "start": 28.04, + "end": 28.24, + "confidence": 0.885 + }, + { + "text": "ュ", + "start": 28.24, + "end": 28.7, + "confidence": 0.994 + }, + { + "text": "ー", + "start": 28.7, + "end": 29.18, + "confidence": 0.999 + }, + { + "text": "ス", + "start": 29.18, + "end": 29.2, + "confidence": 0.968 + }, + { + "text": "タ", + "start": 29.2, + "end": 29.34, + "confidence": 0.992 + }, + { + "text": "ブ", + "start": 29.34, + "end": 29.44, + "confidence": 0.996 + }, + { + "text": "では", + "start": 29.44, + "end": 29.7, + "confidence": 0.979 + }, + { + "text": " バ", + "start": 29.7, + "end": 30.06, + "confidence": 0.502 + }, + { + "text": "ブ", + "start": 30.06, + "end": 30.16, + "confidence": 0.629 + }, + { + "text": "コ", + "start": 30.16, + "end": 30.34, + "confidence": 0.636 + }, + { + "text": "ン", + "start": 30.34, + "end": 30.42, + "confidence": 0.982 + }, + { + "text": "最", + "start": 30.42, + "end": 30.6, + "confidence": 0.66 + }, + { + "text": "図", + "start": 30.6, + "end": 30.88, + "confidence": 0.957 + }, + { + "text": "免", + "start": 30.88, + "end": 30.94, + "confidence": 0.977 + }, + { + "text": "証", + "start": 30.94, + "end": 31.12, + "confidence": 0.997 + }, + { + "text": "記", + "start": 31.12, + "end": 31.26, + "confidence": 0.994 + }, + { + "text": "と", + "start": 31.26, + "end": 31.42, + "confidence": 0.726 + }, + { + "text": "も", + "start": 31.42, + "end": 31.54, + "confidence": 0.836 + }, + { + "text": "に", + "start": 31.54, + "end": 31.66, + "confidence": 0.946 + }, + { + "text": "正", + "start": 31.66, + "end": 31.96, + "confidence": 0.482 + }, + { + "text": "しく", + "start": 31.96, + "end": 32.18, + "confidence": 0.999 + }, + { + "text": "ない", + "start": 32.18, + "end": 32.39, + "confidence": 0.937 + } + ] + }, + { + "id": 5, + "seek": 2726, + "start": 32.39, + "end": 36.34, + "text": "以上 時にいいなります こちらいたしても ではありません", + "tokens": [ + 50620, + 29497, + 220, + 6611, + 4108, + 13806, + 3203, + 19420, + 14384, + 28567, + 17679, + 8822, + 4801, + 220, + 16719, + 14498, + 30250, + 50838 + ], + "temperature": 0.0, + "avg_logprob": -0.44633566366659627, + "compression_ratio": 1.375, + "no_speech_prob": 0.35541412234306335, + "confidence": 0.456, + "words": [ + { + "text": "以上", + "start": 32.39, + "end": 32.74, + "confidence": 0.31 + }, + { + "text": " ", + "start": 32.74, + "end": 33.04, + "confidence": 0.126 + }, + { + "text": "時", + "start": 33.04, + "end": 33.1, + "confidence": 0.35 + }, + { + "text": "に", + "start": 33.1, + "end": 33.12, + "confidence": 0.786 + }, + { + "text": "いい", + "start": 33.12, + "end": 33.36, + "confidence": 0.613 + }, + { + "text": "な", + "start": 33.36, + "end": 33.48, + "confidence": 0.793 + }, + { + "text": "ります", + "start": 33.48, + "end": 34.02, + "confidence": 0.895 + }, + { + "text": " こ", + "start": 34.02, + "end": 34.38, + "confidence": 0.329 + }, + { + "text": "ちら", + "start": 34.38, + "end": 34.56, + "confidence": 0.683 + }, + { + "text": "いた", + "start": 34.56, + "end": 34.84, + "confidence": 0.572 + }, + { + "text": "して", + "start": 34.84, + "end": 35.04, + "confidence": 0.44 + }, + { + "text": "も", + "start": 35.04, + "end": 35.18, + "confidence": 0.419 + }, + { + "text": " ", + "start": 35.18, + "end": 35.38, + "confidence": 0.247 + }, + { + "text": "では", + "start": 35.38, + "end": 35.46, + "confidence": 0.141 + }, + { + "text": "あり", + "start": 35.46, + "end": 35.72, + "confidence": 0.906 + }, + { + "text": "ません", + "start": 35.72, + "end": 36.34, + "confidence": 0.997 + } + ] + }, + { + "id": 6, + "seek": 2726, + "start": 36.42, + "end": 40.4, + "text": "パブコンはサプテレクトリーごとに 設定した", + "tokens": [ + 50838, + 23268, + 28889, + 18066, + 4824, + 3065, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 39035, + 12088, + 8533, + 51042 + ], + "temperature": 0.0, + "avg_logprob": -0.44633566366659627, + "compression_ratio": 1.375, + "no_speech_prob": 0.35541412234306335, + "confidence": 0.848, + "words": [ + { + "text": "パ", + "start": 36.42, + "end": 36.92, + "confidence": 0.477 + }, + { + "text": "ブ", + "start": 36.92, + "end": 37.04, + "confidence": 0.649 + }, + { + "text": "コ", + "start": 37.04, + "end": 37.2, + "confidence": 0.678 + }, + { + "text": "ン", + "start": 37.2, + "end": 37.72, + "confidence": 0.994 + }, + { + "text": "は", + "start": 37.72, + "end": 37.74, + "confidence": 0.975 + }, + { + "text": "サ", + "start": 37.74, + "end": 38.22, + "confidence": 0.667 + }, + { + "text": "プ", + "start": 38.22, + "end": 38.32, + "confidence": 0.572 + }, + { + "text": "テ", + "start": 38.32, + "end": 38.42, + "confidence": 0.984 + }, + { + "text": "レ", + "start": 38.42, + "end": 38.56, + "confidence": 0.997 + }, + { + "text": "ク", + "start": 38.56, + "end": 38.68, + "confidence": 0.997 + }, + { + "text": "ト", + "start": 38.68, + "end": 38.78, + "confidence": 0.998 + }, + { + "text": "リ", + "start": 38.78, + "end": 38.92, + "confidence": 0.999 + }, + { + "text": "ー", + "start": 38.92, + "end": 38.96, + "confidence": 0.987 + }, + { + "text": "ご", + "start": 38.96, + "end": 39.08, + "confidence": 0.959 + }, + { + "text": "と", + "start": 39.08, + "end": 39.26, + "confidence": 0.999 + }, + { + "text": "に", + "start": 39.26, + "end": 39.58, + "confidence": 0.974 + }, + { + "text": " ", + "start": 39.58, + "end": 39.66, + "confidence": 0.666 + }, + { + "text": "設", + "start": 39.66, + "end": 39.82, + "confidence": 0.912 + }, + { + "text": "定", + "start": 39.82, + "end": 40.02, + "confidence": 0.999 + }, + { + "text": "した", + "start": 40.02, + "end": 40.4, + "confidence": 0.873 + } + ] + } + ], + "language": "ja", + "language_probs": { + "en": 0.006242942530661821, + "zh": 0.0014598396373912692, + "de": 0.00011257075675530359, + "es": 0.0013291992945596576, + "ru": 0.00022562954109162092, + "ko": 0.00042483757715672255, + "fr": 0.0003181886568199843, + "ja": 0.986291229724884, + "pt": 0.0003494618576951325, + "tr": 8.698792953509837e-05, + "pl": 0.0008717101882211864, + "ca": 1.300364374401397e-06, + "nl": 1.030837756843539e-05, + "ar": 1.6217354641412385e-05, + "sv": 1.837667150539346e-05, + "it": 0.00015030168287921697, + "id": 1.4651158380729612e-05, + "hi": 3.514630225254223e-05, + "fi": 3.327585000079125e-05, + "vi": 9.459498869546223e-06, + "he": 1.4766068488825113e-05, + "uk": 2.3967695597093552e-05, + "el": 3.6833007470704615e-05, + "ms": 3.487279172986746e-05, + "cs": 1.2630088349396829e-05, + "ro": 2.0182824300718494e-05, + "da": 7.237617296596e-07, + "hu": 8.413452633249108e-06, + "ta": 3.047150585189229e-06, + "no": 4.935625952384726e-07, + "th": 2.3967695597093552e-05, + "ur": 1.823366073949728e-05, + "hr": 3.243675109843025e-06, + "bg": 5.907170930186112e-07, + "lt": 1.6532277413716656e-07, + "la": 0.0001827203668653965, + "mi": 8.045069262152538e-05, + "ml": 1.6183295201699366e-06, + "cy": 6.168368418002501e-05, + "sk": 7.948966640469735e-07, + "te": 2.2119941149867373e-06, + "fa": 1.6472740753670223e-05, + "lv": 2.224660988758842e-07, + "bn": 1.0470711458765436e-05, + "sr": 5.13223938014562e-07, + "az": 6.589925192201918e-07, + "sl": 1.0286728411301738e-06, + "kn": 4.974336889063125e-07, + "et": 6.589925192201918e-07, + "mk": 2.2421092182867142e-07, + "br": 3.3016895031323656e-05, + "eu": 3.890341395162977e-05, + "is": 1.7325677958979213e-07, + "hy": 1.6438145848951535e-06, + "ne": 9.149205197900301e-07, + "mn": 2.098681579809636e-05, + "bs": 1.1565679187697242e-06, + "kk": 2.4624750949442387e-07, + "sq": 8.79870412973105e-07, + "sw": 8.886376235750504e-06, + "gl": 4.2394538468215615e-05, + "mr": 5.725426035496639e-07, + "pa": 1.6310223145410419e-06, + "si": 1.4424012988456525e-05, + "km": 4.045314926770516e-05, + "sn": 2.304950794496108e-05, + "yo": 3.3728883863659576e-06, + "so": 2.386712480983988e-07, + "af": 9.970239034373662e-07, + "oc": 5.432133093563607e-06, + "ka": 1.0864949899769272e-06, + "be": 5.692826562153641e-06, + "tg": 1.9062889933252336e-08, + "sd": 1.7092966118070763e-06, + "gu": 3.2116733450493484e-07, + "am": 1.2976332186553918e-07, + "yi": 4.197627276880667e-06, + "lo": 2.5806517101045756e-07, + "uz": 3.42395833907716e-10, + "fo": 1.428170662620687e-06, + "ht": 2.3546556349174352e-06, + "ps": 1.520279852229578e-06, + "tk": 7.731094653351533e-10, + "nn": 0.0008993811788968742, + "mt": 1.0632426494794345e-07, + "sa": 1.7263284462387674e-05, + "lb": 3.039843265995046e-09, + "my": 2.098681579809636e-05, + "bo": 1.3031010894337669e-05, + "tl": 7.029703283478739e-06, + "mg": 1.302323138219208e-09, + "as": 4.321778419580369e-07, + "tt": 3.4580756036461935e-09, + "haw": 2.868557749025058e-05, + "ln": 8.527997579221847e-07, + "ha": 6.360374449343453e-09, + "ba": 5.355172416621201e-10, + "jw": 0.00018999911844730377, + "su": 3.0877136403262284e-09 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/jp_japanese.mp3.words.json b/tests/expected/tiny_auto/jp_japanese.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..026b0e870b9b6a0f872bacea414fc23f025575d0 --- /dev/null +++ b/tests/expected/tiny_auto/jp_japanese.mp3.words.json @@ -0,0 +1,1518 @@ +{ + "text": "いきます 入室タブでの最図免証記が実際と違う県に関するご質問いただいております同じ度面でデレクトリーごとに 別再度として管理上をしている際と針ます サプテレクトリーごとにわけたサイトはそれぞれ パブリシャーセンターに登録していくグルニュース上では 別再度として認識され パンコン愛行は最図ごと物物は正しく表示されますしかし グルニュースタブでは バブコン最図免証記ともに正しくない以上 時にいいなります こちらいたしても ではありませんパブコンはサプテレクトリーごとに 設定した", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 6.62, + "text": "いきます 入室タブでの最図免証記が実際と違う県に関するご質問いただいております", + "tokens": [ + 50364, + 47348, + 220, + 14028, + 2415, + 97, + 12144, + 28889, + 2474, + 2972, + 8661, + 3919, + 111, + 2347, + 235, + 5396, + 120, + 16958, + 5142, + 33197, + 34837, + 3193, + 49806, + 2862, + 234, + 4108, + 5196, + 95, + 22570, + 9991, + 43450, + 11361, + 32418, + 18549, + 6117, + 19420, + 50712 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.55, + "words": [ + { + "text": "いきます", + "start": 0.08, + "end": 0.6, + "confidence": 0.153 + }, + { + "text": " ", + "start": 0.6, + "end": 1.18, + "confidence": 0.304 + }, + { + "text": "入", + "start": 1.18, + "end": 1.38, + "confidence": 0.436 + }, + { + "text": "室", + "start": 1.38, + "end": 1.52, + "confidence": 0.5 + }, + { + "text": "タ", + "start": 1.52, + "end": 1.68, + "confidence": 0.2 + }, + { + "text": "ブ", + "start": 1.68, + "end": 1.78, + "confidence": 0.839 + }, + { + "text": "で", + "start": 1.78, + "end": 1.98, + "confidence": 0.947 + }, + { + "text": "の", + "start": 1.98, + "end": 2.22, + "confidence": 0.905 + }, + { + "text": "最", + "start": 2.22, + "end": 2.64, + "confidence": 0.275 + }, + { + "text": "図", + "start": 2.64, + "end": 2.78, + "confidence": 0.347 + }, + { + "text": "免", + "start": 2.78, + "end": 2.98, + "confidence": 0.647 + }, + { + "text": "証", + "start": 2.98, + "end": 3.18, + "confidence": 0.283 + }, + { + "text": "記", + "start": 3.18, + "end": 3.32, + "confidence": 0.296 + }, + { + "text": "が", + "start": 3.32, + "end": 3.5, + "confidence": 0.975 + }, + { + "text": "実", + "start": 3.5, + "end": 3.74, + "confidence": 0.342 + }, + { + "text": "際", + "start": 3.74, + "end": 3.9, + "confidence": 0.975 + }, + { + "text": "と", + "start": 3.9, + "end": 4.08, + "confidence": 0.909 + }, + { + "text": "違う", + "start": 4.08, + "end": 4.38, + "confidence": 0.799 + }, + { + "text": "県", + "start": 4.38, + "end": 4.6, + "confidence": 0.404 + }, + { + "text": "に", + "start": 4.6, + "end": 4.74, + "confidence": 0.988 + }, + { + "text": "関", + "start": 4.74, + "end": 4.94, + "confidence": 0.544 + }, + { + "text": "する", + "start": 4.94, + "end": 5.12, + "confidence": 0.946 + }, + { + "text": "ご", + "start": 5.12, + "end": 5.3, + "confidence": 0.498 + }, + { + "text": "質", + "start": 5.3, + "end": 5.46, + "confidence": 0.943 + }, + { + "text": "問", + "start": 5.46, + "end": 5.6, + "confidence": 0.99 + }, + { + "text": "いただ", + "start": 5.6, + "end": 5.92, + "confidence": 0.923 + }, + { + "text": "いて", + "start": 5.92, + "end": 6.16, + "confidence": 0.982 + }, + { + "text": "お", + "start": 6.16, + "end": 6.26, + "confidence": 0.791 + }, + { + "text": "ります", + "start": 6.26, + "end": 6.62, + "confidence": 0.976 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.92, + "end": 12.9, + "text": "同じ度面でデレクトリーごとに 別再度として管理上をしている際と針", + "tokens": [ + 50712, + 13089, + 9257, + 13127, + 8833, + 2474, + 31327, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 23131, + 13876, + 5708, + 5998, + 8822, + 22979, + 34837, + 3193, + 5873, + 251, + 51010 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.524, + "words": [ + { + "text": "同", + "start": 6.92, + "end": 7.3, + "confidence": 0.834 + }, + { + "text": "じ", + "start": 7.3, + "end": 7.42, + "confidence": 0.229 + }, + { + "text": "度", + "start": 7.42, + "end": 7.6, + "confidence": 0.137 + }, + { + "text": "面", + "start": 7.6, + "end": 7.76, + "confidence": 0.294 + }, + { + "text": "で", + "start": 7.76, + "end": 8.0, + "confidence": 0.571 + }, + { + "text": "デ", + "start": 8.0, + "end": 8.16, + "confidence": 0.514 + }, + { + "text": "レ", + "start": 8.16, + "end": 8.28, + "confidence": 0.478 + }, + { + "text": "ク", + "start": 8.28, + "end": 8.36, + "confidence": 0.98 + }, + { + "text": "ト", + "start": 8.36, + "end": 8.5, + "confidence": 0.984 + }, + { + "text": "リ", + "start": 8.5, + "end": 8.62, + "confidence": 0.967 + }, + { + "text": "ー", + "start": 8.62, + "end": 8.7, + "confidence": 0.643 + }, + { + "text": "ご", + "start": 8.7, + "end": 8.84, + "confidence": 0.228 + }, + { + "text": "と", + "start": 8.84, + "end": 8.98, + "confidence": 0.988 + }, + { + "text": "に", + "start": 8.98, + "end": 9.14, + "confidence": 0.988 + }, + { + "text": " ", + "start": 9.14, + "end": 9.38, + "confidence": 0.258 + }, + { + "text": "別", + "start": 9.38, + "end": 9.42, + "confidence": 0.401 + }, + { + "text": "再", + "start": 9.42, + "end": 9.58, + "confidence": 0.432 + }, + { + "text": "度", + "start": 9.58, + "end": 9.74, + "confidence": 0.325 + }, + { + "text": "と", + "start": 9.74, + "end": 9.9, + "confidence": 0.885 + }, + { + "text": "して", + "start": 9.9, + "end": 10.3, + "confidence": 0.998 + }, + { + "text": "管", + "start": 10.3, + "end": 10.88, + "confidence": 0.819 + }, + { + "text": "理", + "start": 10.88, + "end": 11.06, + "confidence": 1.0 + }, + { + "text": "上", + "start": 11.06, + "end": 11.38, + "confidence": 0.469 + }, + { + "text": "を", + "start": 11.38, + "end": 11.54, + "confidence": 0.966 + }, + { + "text": "して", + "start": 11.54, + "end": 11.84, + "confidence": 0.916 + }, + { + "text": "いる", + "start": 11.84, + "end": 12.16, + "confidence": 0.979 + }, + { + "text": "際", + "start": 12.16, + "end": 12.54, + "confidence": 0.121 + }, + { + "text": "と", + "start": 12.54, + "end": 12.7, + "confidence": 0.714 + }, + { + "text": "針", + "start": 12.7, + "end": 12.9, + "confidence": 0.266 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.9, + "end": 19.55, + "text": "ます サプテレクトリーごとにわけたサイトはそれぞれ パブリシャーセンターに登録していく", + "tokens": [ + 51010, + 5368, + 220, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 9206, + 7625, + 3368, + 23607, + 8040, + 7588, + 3065, + 13873, + 31563, + 4132, + 15096, + 239, + 28889, + 12376, + 11054, + 17233, + 3384, + 31223, + 4824, + 30736, + 4108, + 46246, + 8822, + 49394, + 51340 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.715, + "words": [ + { + "text": "ます", + "start": 12.9, + "end": 13.22, + "confidence": 0.343 + }, + { + "text": " ", + "start": 13.22, + "end": 14.1, + "confidence": 0.339 + }, + { + "text": "サ", + "start": 14.1, + "end": 14.22, + "confidence": 0.553 + }, + { + "text": "プ", + "start": 14.22, + "end": 14.32, + "confidence": 0.785 + }, + { + "text": "テ", + "start": 14.32, + "end": 14.42, + "confidence": 0.619 + }, + { + "text": "レ", + "start": 14.42, + "end": 14.58, + "confidence": 0.865 + }, + { + "text": "ク", + "start": 14.58, + "end": 14.68, + "confidence": 0.996 + }, + { + "text": "ト", + "start": 14.68, + "end": 14.78, + "confidence": 0.999 + }, + { + "text": "リ", + "start": 14.78, + "end": 14.92, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 14.92, + "end": 15.04, + "confidence": 0.996 + }, + { + "text": "ご", + "start": 15.04, + "end": 15.1, + "confidence": 0.965 + }, + { + "text": "と", + "start": 15.1, + "end": 15.38, + "confidence": 0.998 + }, + { + "text": "に", + "start": 15.38, + "end": 15.84, + "confidence": 0.622 + }, + { + "text": "わ", + "start": 15.84, + "end": 16.02, + "confidence": 0.734 + }, + { + "text": "け", + "start": 16.02, + "end": 16.14, + "confidence": 0.992 + }, + { + "text": "た", + "start": 16.14, + "end": 16.28, + "confidence": 0.885 + }, + { + "text": "サ", + "start": 16.28, + "end": 16.42, + "confidence": 0.304 + }, + { + "text": "イ", + "start": 16.42, + "end": 16.5, + "confidence": 0.976 + }, + { + "text": "ト", + "start": 16.5, + "end": 16.6, + "confidence": 0.933 + }, + { + "text": "は", + "start": 16.6, + "end": 17.08, + "confidence": 0.894 + }, + { + "text": "それ", + "start": 17.08, + "end": 17.46, + "confidence": 0.476 + }, + { + "text": "ぞ", + "start": 17.46, + "end": 17.64, + "confidence": 0.921 + }, + { + "text": "れ", + "start": 17.64, + "end": 17.72, + "confidence": 0.998 + }, + { + "text": " パ", + "start": 17.72, + "end": 17.86, + "confidence": 0.503 + }, + { + "text": "ブ", + "start": 17.86, + "end": 17.94, + "confidence": 0.96 + }, + { + "text": "リ", + "start": 17.94, + "end": 18.06, + "confidence": 0.988 + }, + { + "text": "シ", + "start": 18.06, + "end": 18.18, + "confidence": 0.544 + }, + { + "text": "ャ", + "start": 18.18, + "end": 18.26, + "confidence": 0.911 + }, + { + "text": "ー", + "start": 18.26, + "end": 18.42, + "confidence": 0.543 + }, + { + "text": "セ", + "start": 18.42, + "end": 18.44, + "confidence": 0.368 + }, + { + "text": "ン", + "start": 18.44, + "end": 18.48, + "confidence": 0.916 + }, + { + "text": "ター", + "start": 18.48, + "end": 18.6, + "confidence": 0.996 + }, + { + "text": "に", + "start": 18.6, + "end": 18.76, + "confidence": 0.703 + }, + { + "text": "登録", + "start": 18.76, + "end": 19.12, + "confidence": 0.672 + }, + { + "text": "して", + "start": 19.12, + "end": 19.38, + "confidence": 0.438 + }, + { + "text": "いく", + "start": 19.38, + "end": 19.55, + "confidence": 0.81 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 19.55, + "end": 27.08, + "text": "グルニュース上では 別再度として認識され パンコン愛行は最図ごと物物は正しく表示されます", + "tokens": [ + 51340, + 23839, + 9405, + 34737, + 26167, + 3384, + 9550, + 5708, + 16719, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 22041, + 43143, + 6722, + 4132, + 220, + 23268, + 1047, + 111, + 18066, + 4824, + 15157, + 8082, + 3065, + 8661, + 3919, + 111, + 9991, + 3193, + 23516, + 23516, + 3065, + 15789, + 26568, + 40053, + 6722, + 4132, + 5368, + 51727 + ], + "temperature": 0.0, + "avg_logprob": -0.568543427909901, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.18977835774421692, + "confidence": 0.554, + "words": [ + { + "text": "グ", + "start": 19.55, + "end": 19.78, + "confidence": 0.699 + }, + { + "text": "ル", + "start": 19.78, + "end": 19.88, + "confidence": 0.976 + }, + { + "text": "ニ", + "start": 19.88, + "end": 20.02, + "confidence": 0.898 + }, + { + "text": "ュ", + "start": 20.02, + "end": 20.14, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 20.14, + "end": 20.18, + "confidence": 0.994 + }, + { + "text": "ス", + "start": 20.18, + "end": 20.22, + "confidence": 0.958 + }, + { + "text": "上", + "start": 20.22, + "end": 20.42, + "confidence": 0.311 + }, + { + "text": "では", + "start": 20.42, + "end": 20.82, + "confidence": 0.786 + }, + { + "text": " ", + "start": 20.82, + "end": 21.1, + "confidence": 0.598 + }, + { + "text": "別", + "start": 21.1, + "end": 21.16, + "confidence": 0.288 + }, + { + "text": "再", + "start": 21.16, + "end": 21.34, + "confidence": 0.839 + }, + { + "text": "度", + "start": 21.34, + "end": 21.46, + "confidence": 1.0 + }, + { + "text": "と", + "start": 21.46, + "end": 21.64, + "confidence": 0.991 + }, + { + "text": "して", + "start": 21.64, + "end": 21.82, + "confidence": 0.994 + }, + { + "text": "認", + "start": 21.82, + "end": 22.04, + "confidence": 0.497 + }, + { + "text": "識", + "start": 22.04, + "end": 22.2, + "confidence": 0.99 + }, + { + "text": "さ", + "start": 22.2, + "end": 22.34, + "confidence": 0.994 + }, + { + "text": "れ", + "start": 22.34, + "end": 22.7, + "confidence": 0.958 + }, + { + "text": " ", + "start": 22.7, + "end": 22.76, + "confidence": 0.202 + }, + { + "text": "パ", + "start": 22.76, + "end": 22.9, + "confidence": 0.04 + }, + { + "text": "ン", + "start": 22.9, + "end": 23.0, + "confidence": 0.332 + }, + { + "text": "コ", + "start": 23.0, + "end": 23.24, + "confidence": 0.919 + }, + { + "text": "ン", + "start": 23.24, + "end": 23.4, + "confidence": 0.808 + }, + { + "text": "愛", + "start": 23.4, + "end": 23.48, + "confidence": 0.245 + }, + { + "text": "行", + "start": 23.48, + "end": 23.64, + "confidence": 0.133 + }, + { + "text": "は", + "start": 23.64, + "end": 23.86, + "confidence": 0.959 + }, + { + "text": "最", + "start": 23.86, + "end": 24.1, + "confidence": 0.723 + }, + { + "text": "図", + "start": 24.1, + "end": 24.3, + "confidence": 0.948 + }, + { + "text": "ご", + "start": 24.3, + "end": 24.42, + "confidence": 0.154 + }, + { + "text": "と", + "start": 24.42, + "end": 24.52, + "confidence": 0.969 + }, + { + "text": "物", + "start": 24.52, + "end": 24.76, + "confidence": 0.238 + }, + { + "text": "物", + "start": 24.76, + "end": 24.92, + "confidence": 0.194 + }, + { + "text": "は", + "start": 24.92, + "end": 25.08, + "confidence": 0.567 + }, + { + "text": "正", + "start": 25.08, + "end": 25.36, + "confidence": 0.454 + }, + { + "text": "しく", + "start": 25.36, + "end": 25.64, + "confidence": 0.955 + }, + { + "text": "表示", + "start": 25.64, + "end": 26.0, + "confidence": 0.422 + }, + { + "text": "さ", + "start": 26.0, + "end": 26.28, + "confidence": 0.71 + }, + { + "text": "れ", + "start": 26.28, + "end": 26.62, + "confidence": 0.976 + }, + { + "text": "ます", + "start": 26.62, + "end": 27.08, + "confidence": 0.474 + } + ] + }, + { + "id": 4, + "seek": 2726, + "start": 27.36, + "end": 32.39, + "text": "しかし グルニュースタブでは バブコン最図免証記ともに正しくない", + "tokens": [ + 50364, + 32156, + 2849, + 220, + 23839, + 9405, + 34737, + 26167, + 3384, + 9550, + 12144, + 28889, + 16719, + 15096, + 238, + 28889, + 18066, + 4824, + 8661, + 3919, + 111, + 2347, + 235, + 5396, + 120, + 16958, + 3193, + 4801, + 4108, + 15789, + 26568, + 9311, + 50620 + ], + "temperature": 0.0, + "avg_logprob": -0.44633566366659627, + "compression_ratio": 1.375, + "no_speech_prob": 0.35541412234306335, + "confidence": 0.806, + "words": [ + { + "text": "しか", + "start": 27.36, + "end": 27.58, + "confidence": 0.958 + }, + { + "text": "し", + "start": 27.58, + "end": 27.74, + "confidence": 0.992 + }, + { + "text": " ", + "start": 27.74, + "end": 27.84, + "confidence": 0.249 + }, + { + "text": "グ", + "start": 27.84, + "end": 27.92, + "confidence": 0.445 + }, + { + "text": "ル", + "start": 27.92, + "end": 28.04, + "confidence": 0.995 + }, + { + "text": "ニ", + "start": 28.04, + "end": 28.24, + "confidence": 0.885 + }, + { + "text": "ュ", + "start": 28.24, + "end": 28.7, + "confidence": 0.994 + }, + { + "text": "ー", + "start": 28.7, + "end": 29.18, + "confidence": 0.999 + }, + { + "text": "ス", + "start": 29.18, + "end": 29.2, + "confidence": 0.968 + }, + { + "text": "タ", + "start": 29.2, + "end": 29.34, + "confidence": 0.992 + }, + { + "text": "ブ", + "start": 29.34, + "end": 29.44, + "confidence": 0.996 + }, + { + "text": "では", + "start": 29.44, + "end": 29.7, + "confidence": 0.979 + }, + { + "text": " バ", + "start": 29.7, + "end": 30.06, + "confidence": 0.502 + }, + { + "text": "ブ", + "start": 30.06, + "end": 30.16, + "confidence": 0.629 + }, + { + "text": "コ", + "start": 30.16, + "end": 30.34, + "confidence": 0.636 + }, + { + "text": "ン", + "start": 30.34, + "end": 30.42, + "confidence": 0.982 + }, + { + "text": "最", + "start": 30.42, + "end": 30.6, + "confidence": 0.66 + }, + { + "text": "図", + "start": 30.6, + "end": 30.88, + "confidence": 0.957 + }, + { + "text": "免", + "start": 30.88, + "end": 30.94, + "confidence": 0.977 + }, + { + "text": "証", + "start": 30.94, + "end": 31.12, + "confidence": 0.997 + }, + { + "text": "記", + "start": 31.12, + "end": 31.26, + "confidence": 0.994 + }, + { + "text": "と", + "start": 31.26, + "end": 31.42, + "confidence": 0.726 + }, + { + "text": "も", + "start": 31.42, + "end": 31.54, + "confidence": 0.836 + }, + { + "text": "に", + "start": 31.54, + "end": 31.66, + "confidence": 0.946 + }, + { + "text": "正", + "start": 31.66, + "end": 31.96, + "confidence": 0.482 + }, + { + "text": "しく", + "start": 31.96, + "end": 32.18, + "confidence": 0.999 + }, + { + "text": "ない", + "start": 32.18, + "end": 32.39, + "confidence": 0.937 + } + ] + }, + { + "id": 5, + "seek": 2726, + "start": 32.39, + "end": 36.34, + "text": "以上 時にいいなります こちらいたしても ではありません", + "tokens": [ + 50620, + 29497, + 220, + 6611, + 4108, + 13806, + 3203, + 19420, + 14384, + 28567, + 17679, + 8822, + 4801, + 220, + 16719, + 14498, + 30250, + 50838 + ], + "temperature": 0.0, + "avg_logprob": -0.44633566366659627, + "compression_ratio": 1.375, + "no_speech_prob": 0.35541412234306335, + "confidence": 0.456, + "words": [ + { + "text": "以上", + "start": 32.39, + "end": 32.74, + "confidence": 0.31 + }, + { + "text": " ", + "start": 32.74, + "end": 33.04, + "confidence": 0.126 + }, + { + "text": "時", + "start": 33.04, + "end": 33.1, + "confidence": 0.35 + }, + { + "text": "に", + "start": 33.1, + "end": 33.12, + "confidence": 0.786 + }, + { + "text": "いい", + "start": 33.12, + "end": 33.36, + "confidence": 0.613 + }, + { + "text": "な", + "start": 33.36, + "end": 33.48, + "confidence": 0.793 + }, + { + "text": "ります", + "start": 33.48, + "end": 34.02, + "confidence": 0.895 + }, + { + "text": " こ", + "start": 34.02, + "end": 34.38, + "confidence": 0.329 + }, + { + "text": "ちら", + "start": 34.38, + "end": 34.56, + "confidence": 0.683 + }, + { + "text": "いた", + "start": 34.56, + "end": 34.84, + "confidence": 0.572 + }, + { + "text": "して", + "start": 34.84, + "end": 35.04, + "confidence": 0.44 + }, + { + "text": "も", + "start": 35.04, + "end": 35.18, + "confidence": 0.419 + }, + { + "text": " ", + "start": 35.18, + "end": 35.38, + "confidence": 0.247 + }, + { + "text": "では", + "start": 35.38, + "end": 35.46, + "confidence": 0.141 + }, + { + "text": "あり", + "start": 35.46, + "end": 35.72, + "confidence": 0.906 + }, + { + "text": "ません", + "start": 35.72, + "end": 36.34, + "confidence": 0.997 + } + ] + }, + { + "id": 6, + "seek": 2726, + "start": 36.42, + "end": 40.4, + "text": "パブコンはサプテレクトリーごとに 設定した", + "tokens": [ + 50838, + 23268, + 28889, + 18066, + 4824, + 3065, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 39035, + 12088, + 8533, + 51042 + ], + "temperature": 0.0, + "avg_logprob": -0.44633566366659627, + "compression_ratio": 1.375, + "no_speech_prob": 0.35541412234306335, + "confidence": 0.848, + "words": [ + { + "text": "パ", + "start": 36.42, + "end": 36.92, + "confidence": 0.477 + }, + { + "text": "ブ", + "start": 36.92, + "end": 37.04, + "confidence": 0.649 + }, + { + "text": "コ", + "start": 37.04, + "end": 37.2, + "confidence": 0.678 + }, + { + "text": "ン", + "start": 37.2, + "end": 37.72, + "confidence": 0.994 + }, + { + "text": "は", + "start": 37.72, + "end": 37.74, + "confidence": 0.975 + }, + { + "text": "サ", + "start": 37.74, + "end": 38.22, + "confidence": 0.667 + }, + { + "text": "プ", + "start": 38.22, + "end": 38.32, + "confidence": 0.572 + }, + { + "text": "テ", + "start": 38.32, + "end": 38.42, + "confidence": 0.984 + }, + { + "text": "レ", + "start": 38.42, + "end": 38.56, + "confidence": 0.997 + }, + { + "text": "ク", + "start": 38.56, + "end": 38.68, + "confidence": 0.997 + }, + { + "text": "ト", + "start": 38.68, + "end": 38.78, + "confidence": 0.998 + }, + { + "text": "リ", + "start": 38.78, + "end": 38.92, + "confidence": 0.999 + }, + { + "text": "ー", + "start": 38.92, + "end": 38.96, + "confidence": 0.987 + }, + { + "text": "ご", + "start": 38.96, + "end": 39.08, + "confidence": 0.959 + }, + { + "text": "と", + "start": 39.08, + "end": 39.26, + "confidence": 0.999 + }, + { + "text": "に", + "start": 39.26, + "end": 39.58, + "confidence": 0.974 + }, + { + "text": " ", + "start": 39.58, + "end": 39.66, + "confidence": 0.666 + }, + { + "text": "設", + "start": 39.66, + "end": 39.82, + "confidence": 0.912 + }, + { + "text": "定", + "start": 39.82, + "end": 40.02, + "confidence": 0.999 + }, + { + "text": "した", + "start": 40.02, + "end": 40.4, + "confidence": 0.873 + } + ] + } + ], + "language": "Japanese" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/laugh1.mp3.words.json b/tests/expected/tiny_auto/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..50b70f39baaa87556c0f3cf8336a32055e01789f --- /dev/null +++ b/tests/expected/tiny_auto/laugh1.mp3.words.json @@ -0,0 +1,163 @@ +{ + "text": " You can't do it, man.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 1.44, + "text": " You can't do it, man.", + "tokens": [ + 50364, + 509, + 393, + 380, + 360, + 309, + 11, + 587, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.4257353869351475, + "compression_ratio": 0.7241379310344828, + "no_speech_prob": 0.4177960157394409, + "confidence": 0.189, + "words": [ + { + "text": "You", + "start": 0.0, + "end": 0.56, + "confidence": 0.081 + }, + { + "text": "can't", + "start": 0.56, + "end": 0.82, + "confidence": 0.302 + }, + { + "text": "do", + "start": 0.82, + "end": 1.04, + "confidence": 0.118 + }, + { + "text": "it,", + "start": 1.04, + "end": 1.2, + "confidence": 0.372 + }, + { + "text": "man.", + "start": 1.26, + "end": 1.44, + "confidence": 0.142 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.22957198321819305, + "zh": 0.008901471272110939, + "de": 0.02785024233162403, + "es": 0.03660845384001732, + "ru": 0.01812249794602394, + "ko": 0.06083052232861519, + "fr": 0.050037823617458344, + "ja": 0.027633508667349815, + "pt": 0.053264982998371124, + "tr": 0.052850473672151566, + "pl": 0.025959279388189316, + "ca": 0.001167618902400136, + "nl": 0.012262295000255108, + "ar": 0.04627741873264313, + "sv": 0.05804488807916641, + "it": 0.018264634534716606, + "id": 0.0061658695340156555, + "hi": 0.028734305873513222, + "fi": 0.00779438903555274, + "vi": 0.0017391633009538054, + "he": 0.006262968294322491, + "uk": 0.004172035492956638, + "el": 0.02326974831521511, + "ms": 0.002322087762877345, + "cs": 0.00351320649497211, + "ro": 0.003798681776970625, + "da": 0.004877604078501463, + "hu": 0.019748777151107788, + "ta": 0.004107354674488306, + "no": 0.005232905503362417, + "th": 0.006824995391070843, + "ur": 0.03660845384001732, + "hr": 0.0009567126980982721, + "bg": 0.000524235307238996, + "lt": 0.0007900504861027002, + "la": 0.005232905503362417, + "mi": 0.0026312703266739845, + "ml": 0.0026312703266739845, + "cy": 0.01490716077387333, + "sk": 0.00025649185408838093, + "te": 0.0010840975446626544, + "fa": 0.00646177539601922, + "lv": 0.00022284396982286125, + "bn": 0.001851329579949379, + "sr": 0.0006549748941324651, + "az": 0.0015169251710176468, + "sl": 0.0012332515325397253, + "kn": 0.00015256117330864072, + "et": 0.000547251955140382, + "mk": 0.00016367423813790083, + "br": 0.0017665510531514883, + "eu": 0.00125267228577286, + "is": 0.0007364080520346761, + "hy": 0.0003069805388804525, + "ne": 0.00043801500578410923, + "mn": 0.0007509324350394309, + "bs": 0.0013650848995894194, + "kk": 8.557905675843358e-05, + "sq": 0.0002595153055153787, + "sw": 0.001292436383664608, + "gl": 0.004801983945071697, + "mr": 0.00020852641318924725, + "pa": 0.0009345505386590958, + "si": 0.0011860063532367349, + "km": 0.0007249911432154477, + "sn": 0.001986186718568206, + "yo": 0.0008987485198304057, + "so": 4.6166442189132795e-05, + "af": 0.00015137392620090395, + "oc": 0.0004773216787725687, + "ka": 0.00022284396982286125, + "be": 0.0003926341305486858, + "tg": 3.2477300919708796e-06, + "sd": 0.002845081500709057, + "gu": 0.000468089448986575, + "am": 8.230057574110106e-05, + "yi": 0.0008279658504761755, + "lo": 7.122521492419764e-05, + "uz": 4.292865796173828e-08, + "fo": 0.0010883405338972807, + "ht": 0.0015408133622258902, + "ps": 0.0002698532189242542, + "tk": 3.5196916314816917e-07, + "nn": 0.007979228161275387, + "mt": 0.0004867360112257302, + "sa": 0.001292436383664608, + "lb": 7.456646926584654e-07, + "my": 0.0004212349303998053, + "bo": 0.0012380782281979918, + "tl": 0.011340769939124584, + "mg": 4.152275892010948e-07, + "as": 0.00019742883159779012, + "tt": 1.6014748780435184e-06, + "haw": 0.004511046223342419, + "ln": 8.008066652109846e-05, + "ha": 7.505962571485725e-07, + "ba": 2.516638630822854e-07, + "jw": 0.01032588817179203, + "su": 1.0538688002270646e-06 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/laugh2.mp3.words.json b/tests/expected/tiny_auto/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..c232ea2208c9f7c8f1c1ead8158a46d82002dff0 --- /dev/null +++ b/tests/expected/tiny_auto/laugh2.mp3.words.json @@ -0,0 +1,133 @@ +{ + "text": " Haha.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.18, + "end": 0.56, + "text": " Haha.", + "tokens": [ + 50364, + 19131, + 13, + 50401 + ], + "temperature": 0.0, + "avg_logprob": -1.3161625862121582, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.5727680921554565, + "confidence": 0.079, + "words": [ + { + "text": "Haha.", + "start": 0.18, + "end": 0.56, + "confidence": 0.079 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.6554492712020874, + "zh": 0.038450948894023895, + "de": 0.025613853707909584, + "es": 0.012004972435534, + "ru": 0.01136607863008976, + "ko": 0.04638069495558739, + "fr": 0.024061987176537514, + "ja": 0.014256256632506847, + "pt": 0.019334351643919945, + "tr": 0.009204532019793987, + "pl": 0.007997035048902035, + "ca": 0.000376959826098755, + "nl": 0.027695180848240852, + "ar": 0.0034127160906791687, + "sv": 0.004701214376837015, + "it": 0.0023091589100658894, + "id": 0.003231094218790531, + "hi": 0.019183890894055367, + "fi": 0.0003843946906272322, + "vi": 0.0028737946413457394, + "he": 0.0006487889331765473, + "uk": 0.000761479779612273, + "el": 0.0010572100291028619, + "ms": 0.0036900255363434553, + "cs": 0.0018195734592154622, + "ro": 0.003359806491062045, + "da": 0.001357484608888626, + "hu": 0.0012752386974170804, + "ta": 0.0015932717360556126, + "no": 0.0012802298879250884, + "th": 0.00844655279070139, + "ur": 0.002102513099089265, + "hr": 0.00023867608979344368, + "bg": 0.00032117380760610104, + "lt": 0.00015112028631847352, + "la": 0.001877332921139896, + "mi": 0.0015442520380020142, + "ml": 0.0010127451969310641, + "cy": 0.013288293033838272, + "sk": 0.0001627629535505548, + "te": 0.001107946620322764, + "fa": 0.00035550701431930065, + "lv": 0.00010549896251177415, + "bn": 0.0016894169384613633, + "sr": 9.094579581869766e-05, + "az": 9.949499508365989e-05, + "sl": 0.00024243468942586333, + "kn": 2.7845457225339487e-05, + "et": 4.004289075965062e-05, + "mk": 3.082211333094165e-05, + "br": 0.0008528169128112495, + "eu": 9.988441888708621e-05, + "is": 0.000155309506226331, + "hy": 0.00014704407658427954, + "ne": 0.00018086731142830104, + "mn": 0.0002652243711054325, + "bs": 0.00024243468942586333, + "kk": 2.7200421754969284e-05, + "sq": 6.177784962346777e-05, + "sw": 0.00029819979681633413, + "gl": 0.0007209545583464205, + "mr": 0.00017258478328585625, + "pa": 8.057351806201041e-05, + "si": 0.0005132342921569943, + "km": 0.000761479779612273, + "sn": 0.0008137635886669159, + "yo": 0.00030646618688479066, + "so": 3.4012084597634384e-06, + "af": 4.6269477024907246e-05, + "oc": 0.00016212840273510665, + "ka": 4.3636271584546193e-05, + "be": 7.779005682095885e-05, + "tg": 2.468644311193202e-07, + "sd": 0.00021987872605677694, + "gu": 8.849268488120288e-05, + "am": 1.851288106990978e-05, + "yi": 0.00012382352724671364, + "lo": 5.346427860786207e-05, + "uz": 3.715637353352008e-09, + "fo": 0.00022863772755954415, + "ht": 0.00039351038867607713, + "ps": 4.6089087845757604e-05, + "tk": 2.928288722614525e-08, + "nn": 0.006375792436301708, + "mt": 3.385146919754334e-05, + "sa": 0.0004172564949840307, + "lb": 1.5539163200628536e-07, + "my": 0.00015837271348573267, + "bo": 0.00055493856780231, + "tl": 0.0009588479297235608, + "mg": 3.752660759914761e-08, + "as": 4.3466148781590164e-05, + "tt": 1.5722335433565604e-07, + "haw": 0.0029650188516825438, + "ln": 1.3386517821345478e-05, + "ha": 5.1092818864617584e-08, + "ba": 2.5590921381990483e-08, + "jw": 0.003205949906259775, + "su": 1.030089435971604e-07 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/punctuations.mp3.words.json b/tests/expected/tiny_auto/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1c08f324250fa9bc99fa513e7c08bc6fa36c7ce2 --- /dev/null +++ b/tests/expected/tiny_auto/punctuations.mp3.words.json @@ -0,0 +1,172 @@ +{ + "text": " Dima, est ce que l'on vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.58, + "text": " Dima, est ce que l'on vole ?", + "tokens": [ + 50364, + 413, + 4775, + 11, + 871, + 1769, + 631, + 287, + 6, + 266, + 49877, + 2506, + 50494 + ], + "temperature": 0.0, + "avg_logprob": -0.84262786592756, + "compression_ratio": 0.7777777777777778, + "no_speech_prob": 0.0010857833549380302, + "confidence": 0.456, + "words": [ + { + "text": "Dima,", + "start": 0.42, + "end": 0.84, + "confidence": 0.247 + }, + { + "text": "est", + "start": 1.2, + "end": 1.4, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 1.4, + "end": 1.56, + "confidence": 0.587 + }, + { + "text": "que", + "start": 1.56, + "end": 1.68, + "confidence": 0.917 + }, + { + "text": "l'on", + "start": 1.68, + "end": 2.02, + "confidence": 0.482 + }, + { + "text": "vole ?", + "start": 2.02, + "end": 2.58, + "confidence": 0.249 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.0004426497616805136, + "zh": 4.149576125200838e-05, + "de": 8.061250991886482e-05, + "es": 2.3643573513254523e-05, + "ru": 2.6583182261674665e-05, + "ko": 3.125686362182023e-06, + "fr": 0.9882750511169434, + "ja": 9.954315464710817e-05, + "pt": 0.001112821395508945, + "tr": 4.917394107906148e-06, + "pl": 1.275475096917944e-05, + "ca": 6.974348707444733e-07, + "nl": 4.4867618271382526e-05, + "ar": 7.226058369269595e-05, + "sv": 2.070300433842931e-05, + "it": 0.0011041612597182393, + "id": 8.47865123887459e-07, + "hi": 1.3029798537900206e-06, + "fi": 7.733837747991856e-08, + "vi": 1.0227206530544208e-06, + "he": 0.00013500054774340242, + "uk": 1.5260881980339036e-07, + "el": 2.256085463159252e-05, + "ms": 2.1866731913178228e-05, + "cs": 2.845969675036031e-06, + "ro": 1.1981979696429335e-05, + "da": 2.890787072828971e-06, + "hu": 5.885350674361689e-06, + "ta": 6.410619590724309e-09, + "no": 5.347418436940643e-07, + "th": 1.6343028619303368e-06, + "ur": 1.8374961427980452e-06, + "hr": 1.6563056703944312e-08, + "bg": 3.927567036043911e-07, + "lt": 1.0610692768864283e-08, + "la": 0.0002483040152583271, + "mi": 3.675223183563503e-07, + "ml": 9.69895896929529e-09, + "cy": 1.6123514797072858e-05, + "sk": 1.1035021429961489e-07, + "te": 1.5406043107901723e-09, + "fa": 8.347201401193161e-07, + "lv": 1.1562876878201678e-08, + "bn": 2.817891413542384e-07, + "sr": 2.631305839884135e-08, + "az": 8.493960024225089e-08, + "sl": 1.6343028619303368e-06, + "kn": 9.533188682908644e-10, + "et": 2.3911566060519363e-08, + "mk": 7.041739991109353e-08, + "br": 0.006255499552935362, + "eu": 5.113281076774001e-06, + "is": 4.954483046049063e-08, + "hy": 1.5142120446398621e-07, + "ne": 3.3518703368429215e-09, + "mn": 5.44143468061975e-08, + "bs": 3.0050458121877455e-08, + "kk": 4.082804316851707e-09, + "sq": 8.9203280140282e-07, + "sw": 8.578595043218229e-07, + "gl": 2.0066037905053236e-05, + "mr": 4.1877967760228785e-09, + "pa": 1.5501206007684232e-07, + "si": 4.0364432152273366e-07, + "km": 4.556057433546812e-07, + "sn": 9.856106771621853e-06, + "yo": 3.807865141425282e-05, + "so": 3.3918873043603526e-08, + "af": 1.2072351296410488e-07, + "oc": 0.0013957907212898135, + "ka": 9.512783094578481e-08, + "be": 3.131329435746011e-07, + "tg": 5.366745270407591e-09, + "sd": 8.412670240431908e-07, + "gu": 1.056932585896675e-08, + "am": 3.313314422825897e-08, + "yi": 6.4637929426680785e-06, + "lo": 3.0587924015890167e-07, + "uz": 7.609391415586142e-12, + "fo": 1.476469492445176e-06, + "ht": 0.00015660336066503078, + "ps": 1.3679765231700003e-07, + "tk": 1.9443653631601165e-10, + "nn": 8.12447615317069e-05, + "mt": 3.942939201806439e-07, + "sa": 1.9826342168016708e-07, + "lb": 5.294661065136097e-08, + "my": 1.9407825675443746e-06, + "bo": 1.593091383256251e-07, + "tl": 5.999636698561517e-08, + "mg": 8.532987094689304e-10, + "as": 6.023119425435652e-08, + "tt": 1.1589410542356404e-09, + "haw": 0.00011547208850970492, + "ln": 5.851840614923276e-05, + "ha": 9.052323890479386e-10, + "ba": 3.292170147606299e-10, + "jw": 3.3533717669342877e-06, + "su": 2.532218323914037e-10 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/radio_short.mp3.words.json b/tests/expected/tiny_auto/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b59ddf126dbb9cf83dfaae7a674dd01344623c28 --- /dev/null +++ b/tests/expected/tiny_auto/radio_short.mp3.words.json @@ -0,0 +1,2934 @@ +{ + "text": " What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, guys? What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? The most important thing is not to be alert. It's what you do. Yes, man. The reference of the book. Good evening, good evening. All of you, this is your BFFM TV. It's called the BFFM story with the actuality. There are 60 minutes of the report of the analysis of the actions you need. As you can see, it's a bit of a report of the reports of the reports. So, what is it, guys? So, we are ready to do it. The report is ready to be told by the leaders of the CFT's CFT's CFT's CFT. The report of the CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C The CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 5.0, + "end": 6.12, + "text": " What are you telling me, guys?", + "tokens": [ + 50364, + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.342, + "words": [ + { + "text": "What", + "start": 5.0, + "end": 5.22, + "confidence": 0.041 + }, + { + "text": "are", + "start": 5.22, + "end": 5.3, + "confidence": 0.602 + }, + { + "text": "you", + "start": 5.3, + "end": 5.46, + "confidence": 0.97 + }, + { + "text": "telling", + "start": 5.46, + "end": 5.7, + "confidence": 0.355 + }, + { + "text": "me,", + "start": 5.7, + "end": 5.94, + "confidence": 0.612 + }, + { + "text": "guys?", + "start": 6.02, + "end": 6.12, + "confidence": 0.304 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.76, + "end": 7.57, + "text": " What are you telling me, dude?", + "tokens": [ + 50664, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.678, + "words": [ + { + "text": "What", + "start": 6.76, + "end": 6.92, + "confidence": 0.886 + }, + { + "text": "are", + "start": 6.92, + "end": 7.06, + "confidence": 0.936 + }, + { + "text": "you", + "start": 7.06, + "end": 7.18, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 7.18, + "end": 7.3, + "confidence": 0.905 + }, + { + "text": "me,", + "start": 7.3, + "end": 7.44, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 7.48, + "end": 7.57, + "confidence": 0.133 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 7.57, + "end": 9.36, + "text": " What are you telling me, dude?", + "tokens": [ + 50764, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.826, + "words": [ + { + "text": "What", + "start": 7.57, + "end": 9.12, + "confidence": 0.543 + }, + { + "text": "are", + "start": 9.12, + "end": 9.28, + "confidence": 0.851 + }, + { + "text": "you", + "start": 9.28, + "end": 9.3, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 9.3, + "end": 9.32, + "confidence": 0.849 + }, + { + "text": "me,", + "start": 9.32, + "end": 9.34, + "confidence": 0.951 + }, + { + "text": "dude?", + "start": 9.34, + "end": 9.36, + "confidence": 0.858 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.22, + "end": 12.5, + "text": " What are you telling me, dude?", + "tokens": [ + 50864, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.774, + "words": [ + { + "text": "What", + "start": 11.22, + "end": 11.44, + "confidence": 0.422 + }, + { + "text": "are", + "start": 11.44, + "end": 11.56, + "confidence": 0.79 + }, + { + "text": "you", + "start": 11.56, + "end": 11.66, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 11.66, + "end": 11.68, + "confidence": 0.809 + }, + { + "text": "me,", + "start": 11.68, + "end": 12.48, + "confidence": 0.957 + }, + { + "text": "dude?", + "start": 12.48, + "end": 12.5, + "confidence": 0.837 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 12.5, + "end": 13.54, + "text": " What are you telling me, guys?", + "tokens": [ + 50964, + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.76, + "words": [ + { + "text": "What", + "start": 12.5, + "end": 12.52, + "confidence": 0.503 + }, + { + "text": "are", + "start": 12.52, + "end": 12.54, + "confidence": 0.845 + }, + { + "text": "you", + "start": 12.54, + "end": 12.64, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 12.64, + "end": 12.66, + "confidence": 0.847 + }, + { + "text": "me,", + "start": 12.66, + "end": 13.04, + "confidence": 0.962 + }, + { + "text": "guys?", + "start": 13.52, + "end": 13.54, + "confidence": 0.561 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 20.32, + "end": 21.4, + "text": " What are you telling me, guys?", + "tokens": [ + 51164, + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.936, + "words": [ + { + "text": "What", + "start": 20.32, + "end": 20.72, + "confidence": 0.886 + }, + { + "text": "are", + "start": 20.72, + "end": 20.78, + "confidence": 0.936 + }, + { + "text": "you", + "start": 20.78, + "end": 20.8, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 20.8, + "end": 21.04, + "confidence": 0.935 + }, + { + "text": "me,", + "start": 21.04, + "end": 21.3, + "confidence": 0.897 + }, + { + "text": "guys?", + "start": 21.38, + "end": 21.4, + "confidence": 0.969 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 22.08, + "end": 22.96, + "text": " What are you telling me, dude?", + "tokens": [ + 51414, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.954, + "words": [ + { + "text": "What", + "start": 22.08, + "end": 22.24, + "confidence": 0.951 + }, + { + "text": "are", + "start": 22.24, + "end": 22.36, + "confidence": 0.965 + }, + { + "text": "you", + "start": 22.36, + "end": 22.44, + "confidence": 0.997 + }, + { + "text": "telling", + "start": 22.44, + "end": 22.6, + "confidence": 0.863 + }, + { + "text": "me,", + "start": 22.6, + "end": 22.78, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 22.94, + "end": 22.96, + "confidence": 0.967 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 22.96, + "end": 23.08, + "text": " What are you telling me, dude?", + "tokens": [ + 51514, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.39788139157178926, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.856, + "words": [ + { + "text": "What", + "start": 22.96, + "end": 22.98, + "confidence": 0.541 + }, + { + "text": "are", + "start": 22.98, + "end": 23.0, + "confidence": 0.888 + }, + { + "text": "you", + "start": 23.0, + "end": 23.02, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 23.02, + "end": 23.04, + "confidence": 0.882 + }, + { + "text": "me,", + "start": 23.04, + "end": 23.06, + "confidence": 0.975 + }, + { + "text": "dude?", + "start": 23.06, + "end": 23.08, + "confidence": 0.955 + } + ] + }, + { + "id": 8, + "seek": 2600, + "start": 26.54, + "end": 32.48, + "text": " The most important thing is not to be alert.", + "tokens": [ + 50414, + 440, + 881, + 1021, + 551, + 307, + 406, + 281, + 312, + 9615, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.277, + "words": [ + { + "text": "The", + "start": 26.54, + "end": 31.02, + "confidence": 0.183 + }, + { + "text": "most", + "start": 31.02, + "end": 31.18, + "confidence": 0.517 + }, + { + "text": "important", + "start": 31.18, + "end": 31.5, + "confidence": 0.995 + }, + { + "text": "thing", + "start": 31.5, + "end": 31.68, + "confidence": 0.218 + }, + { + "text": "is", + "start": 31.68, + "end": 31.9, + "confidence": 0.508 + }, + { + "text": "not", + "start": 31.9, + "end": 32.22, + "confidence": 0.285 + }, + { + "text": "to", + "start": 32.22, + "end": 32.4, + "confidence": 0.344 + }, + { + "text": "be", + "start": 32.4, + "end": 32.42, + "confidence": 0.247 + }, + { + "text": "alert.", + "start": 32.42, + "end": 32.48, + "confidence": 0.038 + } + ] + }, + { + "id": 9, + "seek": 2600, + "start": 33.42, + "end": 34.06, + "text": " It's what you do.", + "tokens": [ + 50664, + 467, + 311, + 437, + 291, + 360, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.503, + "words": [ + { + "text": "It's", + "start": 33.42, + "end": 33.64, + "confidence": 0.307 + }, + { + "text": "what", + "start": 33.64, + "end": 33.72, + "confidence": 0.615 + }, + { + "text": "you", + "start": 33.72, + "end": 33.88, + "confidence": 0.897 + }, + { + "text": "do.", + "start": 33.88, + "end": 34.06, + "confidence": 0.618 + } + ] + }, + { + "id": 10, + "seek": 2600, + "start": 34.86, + "end": 35.5, + "text": " Yes, man.", + "tokens": [ + 50764, + 1079, + 11, + 587, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.089, + "words": [ + { + "text": "Yes,", + "start": 34.86, + "end": 34.88, + "confidence": 0.066 + }, + { + "text": "man.", + "start": 35.48, + "end": 35.5, + "confidence": 0.119 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 37.14, + "end": 38.64, + "text": " The reference of the book.", + "tokens": [ + 50864, + 440, + 6408, + 295, + 264, + 1446, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.268, + "words": [ + { + "text": "The", + "start": 37.14, + "end": 37.28, + "confidence": 0.385 + }, + { + "text": "reference", + "start": 37.28, + "end": 37.74, + "confidence": 0.542 + }, + { + "text": "of", + "start": 37.74, + "end": 37.92, + "confidence": 0.782 + }, + { + "text": "the", + "start": 37.92, + "end": 38.2, + "confidence": 0.371 + }, + { + "text": "book.", + "start": 38.2, + "end": 38.64, + "confidence": 0.023 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 44.94, + "end": 45.65, + "text": " Good evening, good evening.", + "tokens": [ + 51264, + 2205, + 5634, + 11, + 665, + 5634, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.398, + "words": [ + { + "text": "Good", + "start": 44.94, + "end": 45.16, + "confidence": 0.134 + }, + { + "text": "evening,", + "start": 45.16, + "end": 45.4, + "confidence": 0.783 + }, + { + "text": "good", + "start": 45.46, + "end": 45.58, + "confidence": 0.328 + }, + { + "text": "evening.", + "start": 45.58, + "end": 45.65, + "confidence": 0.731 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 45.65, + "end": 47.76, + "text": " All of you, this is your BFFM TV.", + "tokens": [ + 51364, + 1057, + 295, + 291, + 11, + 341, + 307, + 428, + 363, + 6345, + 44, + 3558, + 13, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.327, + "words": [ + { + "text": "All", + "start": 45.65, + "end": 46.28, + "confidence": 0.146 + }, + { + "text": "of", + "start": 46.28, + "end": 46.3, + "confidence": 0.356 + }, + { + "text": "you,", + "start": 46.3, + "end": 46.5, + "confidence": 0.954 + }, + { + "text": "this", + "start": 46.54, + "end": 46.68, + "confidence": 0.225 + }, + { + "text": "is", + "start": 46.68, + "end": 46.82, + "confidence": 0.252 + }, + { + "text": "your", + "start": 46.82, + "end": 47.08, + "confidence": 0.185 + }, + { + "text": "BFFM", + "start": 47.08, + "end": 47.62, + "confidence": 0.326 + }, + { + "text": "TV.", + "start": 47.62, + "end": 47.76, + "confidence": 0.777 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 47.76, + "end": 51.42, + "text": " It's called the BFFM story with the actuality.", + "tokens": [ + 51464, + 467, + 311, + 1219, + 264, + 363, + 6345, + 44, + 1657, + 365, + 264, + 3539, + 507, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.9188976549122432, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09050824493169785, + "confidence": 0.385, + "words": [ + { + "text": "It's", + "start": 47.76, + "end": 48.22, + "confidence": 0.352 + }, + { + "text": "called", + "start": 48.22, + "end": 48.38, + "confidence": 0.216 + }, + { + "text": "the", + "start": 48.38, + "end": 48.72, + "confidence": 0.208 + }, + { + "text": "BFFM", + "start": 48.72, + "end": 49.8, + "confidence": 0.849 + }, + { + "text": "story", + "start": 49.8, + "end": 50.06, + "confidence": 0.466 + }, + { + "text": "with", + "start": 50.06, + "end": 50.22, + "confidence": 0.362 + }, + { + "text": "the", + "start": 50.22, + "end": 50.66, + "confidence": 0.179 + }, + { + "text": "actuality.", + "start": 50.66, + "end": 51.42, + "confidence": 0.322 + } + ] + }, + { + "id": 15, + "seek": 5200, + "start": 52.0, + "end": 56.02, + "text": " There are 60 minutes of the report of the analysis of the actions you need.", + "tokens": [ + 50364, + 821, + 366, + 4060, + 2077, + 295, + 264, + 2275, + 295, + 264, + 5215, + 295, + 264, + 5909, + 291, + 643, + 13, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.49237053765190975, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2626885175704956, + "confidence": 0.3, + "words": [ + { + "text": "There", + "start": 52.0, + "end": 52.16, + "confidence": 0.088 + }, + { + "text": "are", + "start": 52.16, + "end": 52.3, + "confidence": 0.681 + }, + { + "text": "60", + "start": 52.3, + "end": 52.7, + "confidence": 0.199 + }, + { + "text": "minutes", + "start": 52.7, + "end": 53.12, + "confidence": 0.805 + }, + { + "text": "of", + "start": 53.12, + "end": 53.74, + "confidence": 0.218 + }, + { + "text": "the", + "start": 53.74, + "end": 53.86, + "confidence": 0.161 + }, + { + "text": "report", + "start": 53.86, + "end": 54.16, + "confidence": 0.074 + }, + { + "text": "of", + "start": 54.16, + "end": 54.4, + "confidence": 0.558 + }, + { + "text": "the", + "start": 54.4, + "end": 54.52, + "confidence": 0.586 + }, + { + "text": "analysis", + "start": 54.52, + "end": 54.7, + "confidence": 0.127 + }, + { + "text": "of", + "start": 54.7, + "end": 54.82, + "confidence": 0.838 + }, + { + "text": "the", + "start": 54.82, + "end": 55.22, + "confidence": 0.554 + }, + { + "text": "actions", + "start": 55.22, + "end": 55.24, + "confidence": 0.319 + }, + { + "text": "you", + "start": 55.24, + "end": 55.66, + "confidence": 0.557 + }, + { + "text": "need.", + "start": 55.66, + "end": 56.02, + "confidence": 0.164 + } + ] + }, + { + "id": 16, + "seek": 5200, + "start": 56.7, + "end": 59.8, + "text": " As you can see, it's a bit of a report of the reports of the reports.", + "tokens": [ + 50564, + 1018, + 291, + 393, + 536, + 11, + 309, + 311, + 257, + 857, + 295, + 257, + 2275, + 295, + 264, + 7122, + 295, + 264, + 7122, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.49237053765190975, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2626885175704956, + "confidence": 0.288, + "words": [ + { + "text": "As", + "start": 56.7, + "end": 56.9, + "confidence": 0.291 + }, + { + "text": "you", + "start": 56.9, + "end": 56.96, + "confidence": 0.158 + }, + { + "text": "can", + "start": 56.96, + "end": 57.34, + "confidence": 0.268 + }, + { + "text": "see,", + "start": 57.34, + "end": 57.36, + "confidence": 0.922 + }, + { + "text": "it's", + "start": 57.64, + "end": 57.66, + "confidence": 0.309 + }, + { + "text": "a", + "start": 57.66, + "end": 57.68, + "confidence": 0.456 + }, + { + "text": "bit", + "start": 57.68, + "end": 57.7, + "confidence": 0.491 + }, + { + "text": "of", + "start": 57.7, + "end": 57.96, + "confidence": 0.106 + }, + { + "text": "a", + "start": 57.96, + "end": 58.1, + "confidence": 0.444 + }, + { + "text": "report", + "start": 58.1, + "end": 59.06, + "confidence": 0.059 + }, + { + "text": "of", + "start": 59.06, + "end": 59.32, + "confidence": 0.53 + }, + { + "text": "the", + "start": 59.32, + "end": 59.46, + "confidence": 0.709 + }, + { + "text": "reports", + "start": 59.46, + "end": 59.72, + "confidence": 0.095 + }, + { + "text": "of", + "start": 59.72, + "end": 59.76, + "confidence": 0.292 + }, + { + "text": "the", + "start": 59.76, + "end": 59.78, + "confidence": 0.744 + }, + { + "text": "reports.", + "start": 59.78, + "end": 59.8, + "confidence": 0.122 + } + ] + }, + { + "id": 17, + "seek": 5200, + "start": 59.8, + "end": 60.62, + "text": " So, what is it, guys?", + "tokens": [ + 50764, + 407, + 11, + 437, + 307, + 309, + 11, + 1074, + 30, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.49237053765190975, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2626885175704956, + "confidence": 0.234, + "words": [ + { + "text": "So,", + "start": 59.8, + "end": 60.02, + "confidence": 0.154 + }, + { + "text": "what", + "start": 60.18, + "end": 60.2, + "confidence": 0.266 + }, + { + "text": "is", + "start": 60.2, + "end": 60.42, + "confidence": 0.34 + }, + { + "text": "it,", + "start": 60.42, + "end": 60.58, + "confidence": 0.189 + }, + { + "text": "guys?", + "start": 60.58, + "end": 60.62, + "confidence": 0.267 + } + ] + }, + { + "id": 18, + "seek": 5200, + "start": 60.62, + "end": 62.98, + "text": " So, we are ready to do it.", + "tokens": [ + 50814, + 407, + 11, + 321, + 366, + 1919, + 281, + 360, + 309, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.49237053765190975, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2626885175704956, + "confidence": 0.392, + "words": [ + { + "text": "So,", + "start": 60.62, + "end": 62.06, + "confidence": 0.171 + }, + { + "text": "we", + "start": 62.08, + "end": 62.26, + "confidence": 0.36 + }, + { + "text": "are", + "start": 62.26, + "end": 62.44, + "confidence": 0.245 + }, + { + "text": "ready", + "start": 62.44, + "end": 62.68, + "confidence": 0.476 + }, + { + "text": "to", + "start": 62.68, + "end": 62.7, + "confidence": 0.885 + }, + { + "text": "do", + "start": 62.7, + "end": 62.92, + "confidence": 0.587 + }, + { + "text": "it.", + "start": 62.92, + "end": 62.98, + "confidence": 0.383 + } + ] + }, + { + "id": 19, + "seek": 5200, + "start": 63.0, + "end": 66.94, + "text": " The report is ready to be told by the leaders of the CFT's CFT's CFT's CFT.", + "tokens": [ + 50914, + 440, + 2275, + 307, + 1919, + 281, + 312, + 1907, + 538, + 264, + 3523, + 295, + 264, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.49237053765190975, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2626885175704956, + "confidence": 0.35, + "words": [ + { + "text": "The", + "start": 63.0, + "end": 63.44, + "confidence": 0.495 + }, + { + "text": "report", + "start": 63.44, + "end": 63.46, + "confidence": 0.389 + }, + { + "text": "is", + "start": 63.46, + "end": 63.62, + "confidence": 0.524 + }, + { + "text": "ready", + "start": 63.62, + "end": 63.88, + "confidence": 0.055 + }, + { + "text": "to", + "start": 63.88, + "end": 64.26, + "confidence": 0.462 + }, + { + "text": "be", + "start": 64.26, + "end": 64.68, + "confidence": 0.209 + }, + { + "text": "told", + "start": 64.68, + "end": 64.7, + "confidence": 0.047 + }, + { + "text": "by", + "start": 64.7, + "end": 65.04, + "confidence": 0.644 + }, + { + "text": "the", + "start": 65.04, + "end": 65.08, + "confidence": 0.89 + }, + { + "text": "leaders", + "start": 65.08, + "end": 65.46, + "confidence": 0.571 + }, + { + "text": "of", + "start": 65.46, + "end": 65.94, + "confidence": 0.835 + }, + { + "text": "the", + "start": 65.94, + "end": 66.02, + "confidence": 0.739 + }, + { + "text": "CFT's", + "start": 66.02, + "end": 66.36, + "confidence": 0.098 + }, + { + "text": "CFT's", + "start": 66.36, + "end": 66.62, + "confidence": 0.406 + }, + { + "text": "CFT's", + "start": 66.62, + "end": 66.64, + "confidence": 0.51 + }, + { + "text": "CFT.", + "start": 66.64, + "end": 66.94, + "confidence": 0.875 + } + ] + }, + { + "id": 20, + "seek": 6700, + "start": 67.0, + "end": 97.0, + "text": " The report of the CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C", + "tokens": [ + 50364, + 440, + 2275, + 295, + 264, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383 + ], + "temperature": 0.0, + "avg_logprob": -0.09770926231760615, + "compression_ratio": 13.264705882352942, + "no_speech_prob": 0.7737843990325928, + "confidence": 0.908, + "words": [ + { + "text": "The", + "start": 67.0, + "end": 67.84, + "confidence": 0.342 + }, + { + "text": "report", + "start": 67.84, + "end": 67.86, + "confidence": 0.624 + }, + { + "text": "of", + "start": 67.86, + "end": 68.16, + "confidence": 0.37 + }, + { + "text": "the", + "start": 68.16, + "end": 68.36, + "confidence": 0.867 + }, + { + "text": "CFT's", + "start": 68.36, + "end": 68.68, + "confidence": 0.859 + }, + { + "text": "CFT's", + "start": 68.68, + "end": 68.82, + "confidence": 0.845 + }, + { + "text": "CFT's", + "start": 68.82, + "end": 68.94, + "confidence": 0.817 + }, + { + "text": "CFT's", + "start": 68.94, + "end": 69.04, + "confidence": 0.814 + }, + { + "text": "CFT's", + "start": 69.04, + "end": 69.24, + "confidence": 0.829 + }, + { + "text": "CFT's", + "start": 69.24, + "end": 69.64, + "confidence": 0.852 + }, + { + "text": "CFT's", + "start": 69.64, + "end": 69.88, + "confidence": 0.882 + }, + { + "text": "CFT's", + "start": 69.88, + "end": 70.34, + "confidence": 0.889 + }, + { + "text": "CFT's", + "start": 70.34, + "end": 70.92, + "confidence": 0.898 + }, + { + "text": "CFT's", + "start": 70.92, + "end": 71.62, + "confidence": 0.899 + }, + { + "text": "CFT's", + "start": 71.62, + "end": 73.26, + "confidence": 0.896 + }, + { + "text": "CFT's", + "start": 73.26, + "end": 74.0, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 74.0, + "end": 74.4, + "confidence": 0.899 + }, + { + "text": "CFT's", + "start": 74.4, + "end": 74.96, + "confidence": 0.902 + }, + { + "text": "CFT's", + "start": 74.96, + "end": 76.3, + "confidence": 0.904 + }, + { + "text": "CFT's", + "start": 76.3, + "end": 76.32, + "confidence": 0.901 + }, + { + "text": "CFT's", + "start": 76.32, + "end": 76.38, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 76.38, + "end": 76.5, + "confidence": 0.904 + }, + { + "text": "CFT's", + "start": 76.5, + "end": 77.4, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 77.4, + "end": 77.42, + "confidence": 0.901 + }, + { + "text": "CFT's", + "start": 77.42, + "end": 78.0, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 78.0, + "end": 78.02, + "confidence": 0.898 + }, + { + "text": "CFT's", + "start": 78.02, + "end": 78.06, + "confidence": 0.901 + }, + { + "text": "CFT's", + "start": 78.06, + "end": 78.08, + "confidence": 0.903 + }, + { + "text": "CFT's", + "start": 78.08, + "end": 78.1, + "confidence": 0.903 + }, + { + "text": "CFT's", + "start": 78.1, + "end": 78.98, + "confidence": 0.903 + }, + { + "text": "CFT's", + "start": 78.98, + "end": 79.24, + "confidence": 0.903 + }, + { + "text": "CFT's", + "start": 79.24, + "end": 81.7, + "confidence": 0.905 + }, + { + "text": "CFT's", + "start": 81.7, + "end": 81.72, + "confidence": 0.906 + }, + { + "text": "CFT's", + "start": 81.72, + "end": 81.94, + "confidence": 0.91 + }, + { + "text": "CFT's", + "start": 81.94, + "end": 82.32, + "confidence": 0.917 + }, + { + "text": "CFT's", + "start": 82.32, + "end": 82.34, + "confidence": 0.916 + }, + { + "text": "CFT's", + "start": 82.34, + "end": 82.36, + "confidence": 0.917 + }, + { + "text": "CFT's", + "start": 82.36, + "end": 82.44, + "confidence": 0.92 + }, + { + "text": "CFT's", + "start": 82.44, + "end": 82.54, + "confidence": 0.918 + }, + { + "text": "CFT's", + "start": 82.54, + "end": 82.56, + "confidence": 0.923 + }, + { + "text": "CFT's", + "start": 82.56, + "end": 82.58, + "confidence": 0.922 + }, + { + "text": "CFT's", + "start": 82.58, + "end": 82.6, + "confidence": 0.923 + }, + { + "text": "CFT's", + "start": 82.6, + "end": 82.62, + "confidence": 0.928 + }, + { + "text": "CFT's", + "start": 82.62, + "end": 82.68, + "confidence": 0.927 + }, + { + "text": "CFT's", + "start": 82.68, + "end": 83.12, + "confidence": 0.931 + }, + { + "text": "CFT's", + "start": 83.12, + "end": 83.24, + "confidence": 0.932 + }, + { + "text": "CFT's", + "start": 83.24, + "end": 83.54, + "confidence": 0.929 + }, + { + "text": "CFT's", + "start": 83.54, + "end": 83.56, + "confidence": 0.935 + }, + { + "text": "CFT's", + "start": 83.56, + "end": 83.58, + "confidence": 0.933 + }, + { + "text": "CFT's", + "start": 83.58, + "end": 83.7, + "confidence": 0.936 + }, + { + "text": "CFT's", + "start": 83.7, + "end": 83.72, + "confidence": 0.938 + }, + { + "text": "CFT's", + "start": 83.72, + "end": 83.74, + "confidence": 0.94 + }, + { + "text": "CFT's", + "start": 83.74, + "end": 83.76, + "confidence": 0.942 + }, + { + "text": "CFT's", + "start": 83.76, + "end": 83.78, + "confidence": 0.939 + }, + { + "text": "CFT's", + "start": 83.78, + "end": 84.02, + "confidence": 0.946 + }, + { + "text": "CFT's", + "start": 84.02, + "end": 84.04, + "confidence": 0.945 + }, + { + "text": "CFT's", + "start": 84.04, + "end": 84.28, + "confidence": 0.948 + }, + { + "text": "CFT's", + "start": 84.28, + "end": 84.4, + "confidence": 0.948 + }, + { + "text": "CFT's", + "start": 84.4, + "end": 84.54, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 84.54, + "end": 84.56, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 84.56, + "end": 84.58, + "confidence": 0.956 + }, + { + "text": "CFT's", + "start": 84.58, + "end": 85.02, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 85.02, + "end": 85.98, + "confidence": 0.955 + }, + { + "text": "CFT's", + "start": 85.98, + "end": 86.62, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 86.62, + "end": 86.8, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 86.8, + "end": 87.28, + "confidence": 0.95 + }, + { + "text": "CFT's", + "start": 87.28, + "end": 87.58, + "confidence": 0.953 + }, + { + "text": "CFT's", + "start": 87.58, + "end": 87.66, + "confidence": 0.951 + }, + { + "text": "CFT's", + "start": 87.66, + "end": 87.68, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 87.68, + "end": 87.7, + "confidence": 0.953 + }, + { + "text": "CFT's", + "start": 87.7, + "end": 88.84, + "confidence": 0.949 + }, + { + "text": "CFT's", + "start": 88.84, + "end": 88.86, + "confidence": 0.948 + }, + { + "text": "CFT's", + "start": 88.86, + "end": 89.2, + "confidence": 0.95 + }, + { + "text": "CFT's", + "start": 89.2, + "end": 90.42, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 90.42, + "end": 90.82, + "confidence": 0.951 + }, + { + "text": "CFT's", + "start": 90.82, + "end": 96.98, + "confidence": 0.95 + }, + { + "text": "C", + "start": 96.98, + "end": 97.0, + "confidence": 0.974 + } + ] + }, + { + "id": 21, + "seek": 9700, + "start": 97.0, + "end": 127.0, + "text": " The CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C", + "tokens": [ + 50364, + 440, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383 + ], + "temperature": 0.0, + "avg_logprob": -0.04616380272424809, + "compression_ratio": 21.095238095238095, + "no_speech_prob": 0.18022935092449188, + "confidence": 0.955, + "words": [ + { + "text": "The", + "start": 97.0, + "end": 97.94, + "confidence": 0.243 + }, + { + "text": "CFT's", + "start": 97.94, + "end": 97.96, + "confidence": 0.473 + }, + { + "text": "CFT's", + "start": 97.96, + "end": 99.74, + "confidence": 0.916 + }, + { + "text": "CFT's", + "start": 99.74, + "end": 102.98, + "confidence": 0.915 + }, + { + "text": "CFT's", + "start": 102.98, + "end": 104.02, + "confidence": 0.918 + }, + { + "text": "CFT's", + "start": 104.02, + "end": 104.12, + "confidence": 0.929 + }, + { + "text": "CFT's", + "start": 104.12, + "end": 104.28, + "confidence": 0.941 + }, + { + "text": "CFT's", + "start": 104.28, + "end": 104.3, + "confidence": 0.95 + }, + { + "text": "CFT's", + "start": 104.3, + "end": 104.82, + "confidence": 0.96 + }, + { + "text": "CFT's", + "start": 104.82, + "end": 105.08, + "confidence": 0.965 + }, + { + "text": "CFT's", + "start": 105.08, + "end": 105.94, + "confidence": 0.967 + }, + { + "text": "CFT's", + "start": 105.94, + "end": 106.88, + "confidence": 0.969 + }, + { + "text": "CFT's", + "start": 106.88, + "end": 108.96, + "confidence": 0.97 + }, + { + "text": "CFT's", + "start": 108.96, + "end": 108.98, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 108.98, + "end": 109.06, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 109.06, + "end": 109.7, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 109.7, + "end": 109.72, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 109.72, + "end": 110.12, + "confidence": 0.97 + }, + { + "text": "CFT's", + "start": 110.12, + "end": 110.38, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 110.38, + "end": 110.4, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 110.4, + "end": 110.42, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 110.42, + "end": 110.44, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 110.44, + "end": 110.84, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 110.84, + "end": 110.86, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 110.86, + "end": 111.54, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 111.54, + "end": 111.76, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 111.76, + "end": 111.78, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 111.78, + "end": 111.8, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 111.8, + "end": 111.82, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 111.82, + "end": 111.84, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 111.84, + "end": 111.86, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 111.86, + "end": 111.88, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 111.88, + "end": 111.9, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.9, + "end": 111.92, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.92, + "end": 111.94, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.94, + "end": 111.96, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.96, + "end": 111.98, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.98, + "end": 112.0, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 112.0, + "end": 112.02, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 112.02, + "end": 112.04, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 112.04, + "end": 112.58, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 112.58, + "end": 112.76, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 112.76, + "end": 112.78, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.78, + "end": 112.8, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.8, + "end": 112.82, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 112.82, + "end": 112.84, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 112.84, + "end": 112.86, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.86, + "end": 112.88, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.88, + "end": 113.2, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 113.2, + "end": 113.22, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 113.22, + "end": 113.24, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 113.24, + "end": 113.26, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 113.26, + "end": 114.64, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 114.64, + "end": 114.66, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 114.66, + "end": 116.02, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 116.02, + "end": 116.04, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 116.04, + "end": 116.06, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 116.06, + "end": 118.04, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 118.04, + "end": 118.24, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 118.24, + "end": 120.92, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 120.92, + "end": 120.94, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 120.94, + "end": 121.06, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 121.06, + "end": 122.26, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.26, + "end": 122.28, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 122.28, + "end": 122.3, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.3, + "end": 122.32, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 122.32, + "end": 122.34, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 122.34, + "end": 122.36, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.36, + "end": 122.38, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.38, + "end": 122.4, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.4, + "end": 122.42, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.42, + "end": 122.44, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.44, + "end": 122.56, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 122.56, + "end": 126.98, + "confidence": 0.972 + }, + { + "text": "C", + "start": 126.98, + "end": 127.0, + "confidence": 0.992 + } + ] + } + ], + "language": "en", + "language_probs": { + "en": 0.658899188041687, + "zh": 0.050808850675821304, + "de": 0.0033512068912386894, + "es": 0.008898504078388214, + "ru": 0.014331294223666191, + "ko": 0.011974244378507137, + "fr": 0.00918097235262394, + "ja": 0.05160897597670555, + "pt": 0.013568597845733166, + "tr": 0.0036235188599675894, + "pl": 0.0023032487370073795, + "ca": 0.00034234733902849257, + "nl": 0.005927680991590023, + "ar": 0.0007191092590801418, + "sv": 0.001765961991623044, + "it": 0.003197744255885482, + "id": 0.002267539966851473, + "hi": 0.009038634598255157, + "fi": 0.0007024512160569429, + "vi": 0.010484999977052212, + "he": 0.00012399000115692616, + "uk": 0.0007897870964370668, + "el": 0.00020362796203698963, + "ms": 0.004616482648998499, + "cs": 0.0006755407084710896, + "ro": 0.0017250536475330591, + "da": 0.0003092850965913385, + "hu": 0.0024904056917876005, + "ta": 0.008968295529484749, + "no": 0.0006103001651354134, + "th": 0.0027138900477439165, + "ur": 0.0009019651915878057, + "hr": 0.00013617637159768492, + "bg": 0.0006915606209076941, + "lt": 3.6508481571218e-05, + "la": 0.007670989725738764, + "mi": 0.0006446054903790355, + "ml": 0.002671814989298582, + "cy": 0.0022853247355669737, + "sk": 0.00011877514043590054, + "te": 0.0032992514315992594, + "fa": 0.0002012556215049699, + "lv": 4.268274278729223e-05, + "bn": 0.00012892918311990798, + "sr": 4.40376388723962e-05, + "az": 1.3749220670433715e-05, + "sl": 0.00035046591074205935, + "kn": 0.0001377815642626956, + "et": 4.3185871618334204e-05, + "mk": 2.0883322576992214e-05, + "br": 0.0018078406574204564, + "eu": 0.00014383091183844954, + "is": 5.188901923247613e-05, + "hy": 4.561337118502706e-05, + "ne": 0.00013886220403946936, + "mn": 0.0001555182971060276, + "bs": 0.00010646934242686257, + "kk": 5.9134658840775955e-06, + "sq": 5.108455297886394e-05, + "sw": 6.019230931997299e-05, + "gl": 0.0006808390025980771, + "mr": 0.00010041015775641426, + "pa": 7.120145892258734e-05, + "si": 0.0003117108135484159, + "km": 0.0006650673458352685, + "sn": 0.00011201561574125662, + "yo": 0.0001269303320441395, + "so": 2.5433498649363173e-06, + "af": 5.766070171375759e-05, + "oc": 4.218547110212967e-05, + "ka": 1.0019926776294596e-05, + "be": 2.528868571971543e-05, + "tg": 1.1082325812594718e-07, + "sd": 0.0001377815642626956, + "gu": 0.00013095952454023063, + "am": 8.178026291716378e-06, + "yi": 5.811293885926716e-05, + "lo": 8.53708479553461e-06, + "uz": 2.208711435969235e-09, + "fo": 0.00011647781502688304, + "ht": 3.886307968059555e-05, + "ps": 1.9541581423254684e-05, + "tk": 2.696021716985797e-08, + "nn": 0.0401931069791317, + "mt": 1.3536057849705685e-05, + "sa": 0.000331814429955557, + "lb": 7.034901727820397e-08, + "my": 0.0001319866714766249, + "bo": 7.461849600076675e-05, + "tl": 0.0008407239220105112, + "mg": 2.969688495113587e-08, + "as": 1.5218998669297434e-05, + "tt": 9.219026253504126e-08, + "haw": 0.04050834849476814, + "ln": 6.832997314631939e-06, + "ha": 5.194181440515422e-08, + "ba": 2.1452621012940654e-08, + "jw": 0.0069301617331802845, + "su": 8.48571133360565e-08 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/smartphone.mp3.words.json b/tests/expected/tiny_auto/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..8699eb151d57cf019548f6b0f932b2fd8fa5f924 --- /dev/null +++ b/tests/expected/tiny_auto/smartphone.mp3.words.json @@ -0,0 +1,5192 @@ +{ + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent mentionnées. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très symbolablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport inédit ? Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent. On s'est espèrent de nous voter dans la relation à l'objet. C'est facilement éterricion. Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu en fait, une espèce de relation de médiation avec le monde qui rendent encore avec la maille de celles formes de rogeur. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance et de rogeur. Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure. On a par besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On la pade en la main quand on est colis, qu'on n'a même pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui. Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira, j'amé? Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux et diabolique et diabolique par que merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus, soit des finissants débordes, des finissants, une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.66, + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50545 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.709, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.68, + "confidence": 0.85 + }, + { + "text": "évidence", + "start": 0.68, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.08, + "confidence": 0.882 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.345 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.44, + "confidence": 0.921 + }, + { + "text": "mais", + "start": 1.88, + "end": 2.14, + "confidence": 0.914 + }, + { + "text": "je", + "start": 2.14, + "end": 2.26, + "confidence": 0.778 + }, + { + "text": "me", + "start": 2.26, + "end": 2.34, + "confidence": 0.954 + }, + { + "text": "l'étais", + "start": 2.34, + "end": 2.58, + "confidence": 0.719 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.86, + "confidence": 0.946 + }, + { + "text": "formulé", + "start": 2.86, + "end": 3.26, + "confidence": 0.529 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.46, + "confidence": 0.968 + }, + { + "text": "ça.", + "start": 3.46, + "end": 3.66, + "confidence": 0.96 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.14, + "end": 8.9, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 50545, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267, + 50806 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.789, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.26, + "confidence": 0.394 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.38, + "confidence": 0.939 + }, + { + "text": "fait", + "start": 4.38, + "end": 4.56, + "confidence": 0.731 + }, + { + "text": "la", + "start": 4.56, + "end": 4.72, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.72, + "end": 5.02, + "confidence": 0.93 + }, + { + "text": "du", + "start": 5.02, + "end": 5.2, + "confidence": 0.937 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.58, + "confidence": 0.908 + }, + { + "text": "c'est", + "start": 5.9, + "end": 6.2, + "confidence": 0.948 + }, + { + "text": "pas", + "start": 6.2, + "end": 6.26, + "confidence": 0.983 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.6, + "confidence": 0.993 + }, + { + "text": "la", + "start": 6.6, + "end": 6.8, + "confidence": 0.636 + }, + { + "text": "cumulation", + "start": 6.8, + "end": 7.34, + "confidence": 0.691 + }, + { + "text": "des", + "start": 7.34, + "end": 7.56, + "confidence": 0.793 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 8.14, + "confidence": 0.832 + }, + { + "text": "mais", + "start": 8.38, + "end": 8.5, + "confidence": 0.669 + }, + { + "text": "la", + "start": 8.5, + "end": 8.62, + "confidence": 0.718 + }, + { + "text": "manière", + "start": 8.62, + "end": 8.9, + "confidence": 0.498 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.9, + "end": 10.98, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 50806, + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.303, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.06, + "confidence": 0.283 + }, + { + "text": "quelques", + "start": 9.06, + "end": 9.28, + "confidence": 0.281 + }, + { + "text": "interagues", + "start": 9.28, + "end": 10.38, + "confidence": 0.245 + }, + { + "text": "entraîne.", + "start": 10.38, + "end": 10.98, + "confidence": 0.446 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.0, + "end": 12.96, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 50906, + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.63, + "words": [ + { + "text": "Et", + "start": 11.0, + "end": 11.12, + "confidence": 0.362 + }, + { + "text": "il", + "start": 11.12, + "end": 11.28, + "confidence": 0.144 + }, + { + "text": "est", + "start": 11.28, + "end": 11.38, + "confidence": 0.24 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.6, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.7, + "end": 11.78, + "confidence": 0.886 + }, + { + "text": "la", + "start": 11.78, + "end": 11.8, + "confidence": 0.969 + }, + { + "text": "photo", + "start": 11.8, + "end": 12.02, + "confidence": 0.809 + }, + { + "text": "c'est", + "start": 12.02, + "end": 12.26, + "confidence": 0.784 + }, + { + "text": "hyper", + "start": 12.26, + "end": 12.46, + "confidence": 0.938 + }, + { + "text": "convaincant.", + "start": 12.46, + "end": 12.96, + "confidence": 0.509 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.3, + "end": 18.8, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 608, + 14835, + 1531, + 8862, + 8796, + 5732, + 20847, + 51306 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.533, + "words": [ + { + "text": "Alors", + "start": 13.3, + "end": 13.56, + "confidence": 0.894 + }, + { + "text": "évidemment,", + "start": 13.56, + "end": 13.82, + "confidence": 0.777 + }, + { + "text": "il", + "start": 14.38, + "end": 14.4, + "confidence": 0.964 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.855 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.883 + }, + { + "text": "les", + "start": 15.38, + "end": 15.6, + "confidence": 0.935 + }, + { + "text": "interfaces", + "start": 15.6, + "end": 16.0, + "confidence": 0.381 + }, + { + "text": "les", + "start": 16.0, + "end": 16.5, + "confidence": 0.465 + }, + { + "text": "grand-attachilles", + "start": 16.5, + "end": 17.16, + "confidence": 0.214 + }, + { + "text": "à", + "start": 17.16, + "end": 17.32, + "confidence": 0.408 + }, + { + "text": "été", + "start": 17.32, + "end": 17.72, + "confidence": 0.894 + }, + { + "text": "beaucoup", + "start": 17.72, + "end": 18.28, + "confidence": 0.798 + }, + { + "text": "très", + "start": 18.28, + "end": 18.64, + "confidence": 0.493 + }, + { + "text": "souvent", + "start": 18.64, + "end": 18.8, + "confidence": 0.823 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 18.8, + "end": 19.84, + "text": " mentionnées.", + "tokens": [ + 51306, + 2152, + 77, + 6836, + 13, + 51356 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.626, + "words": [ + { + "text": "mentionnées.", + "start": 18.8, + "end": 19.84, + "confidence": 0.626 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 23.58, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 51356, + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156, + 51543 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.658, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.979 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.46, + "confidence": 0.566 + }, + { + "text": "il", + "start": 20.6, + "end": 20.68, + "confidence": 0.975 + }, + { + "text": "faudrait", + "start": 20.68, + "end": 20.78, + "confidence": 0.773 + }, + { + "text": "que", + "start": 20.78, + "end": 20.92, + "confidence": 0.386 + }, + { + "text": "les", + "start": 20.92, + "end": 20.96, + "confidence": 0.221 + }, + { + "text": "profites", + "start": 20.96, + "end": 21.36, + "confidence": 0.568 + }, + { + "text": "aussi", + "start": 21.36, + "end": 21.72, + "confidence": 0.521 + }, + { + "text": "de", + "start": 21.72, + "end": 21.92, + "confidence": 0.489 + }, + { + "text": "20", + "start": 21.92, + "end": 22.14, + "confidence": 0.915 + }, + { + "text": "ans", + "start": 22.14, + "end": 22.32, + "confidence": 0.942 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.52, + "confidence": 0.915 + }, + { + "text": "l'été,", + "start": 22.52, + "end": 22.86, + "confidence": 0.482 + }, + { + "text": "les", + "start": 22.98, + "end": 23.1, + "confidence": 0.903 + }, + { + "text": "ordinateurs", + "start": 23.1, + "end": 23.58, + "confidence": 0.946 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 23.58, + "end": 28.07, + "text": " nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher,", + "tokens": [ + 51543, + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 11, + 601, + 2947, + 631, + 11, + 11246, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 51766 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.656, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.78, + "confidence": 0.766 + }, + { + "text": "ont", + "start": 23.78, + "end": 23.9, + "confidence": 0.981 + }, + { + "text": "appris", + "start": 23.9, + "end": 24.12, + "confidence": 0.952 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.341 + }, + { + "text": "piquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.446 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.816 + }, + { + "text": "des", + "start": 24.72, + "end": 24.9, + "confidence": 0.96 + }, + { + "text": "icônes,", + "start": 24.9, + "end": 25.56, + "confidence": 0.599 + }, + { + "text": "sauf", + "start": 25.64, + "end": 25.8, + "confidence": 0.521 + }, + { + "text": "que,", + "start": 25.8, + "end": 26.36, + "confidence": 0.915 + }, + { + "text": "alors", + "start": 26.36, + "end": 26.58, + "confidence": 0.399 + }, + { + "text": "le", + "start": 26.58, + "end": 26.72, + "confidence": 0.824 + }, + { + "text": "smartphone", + "start": 26.72, + "end": 27.0, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 27.0, + "end": 27.5, + "confidence": 0.673 + }, + { + "text": "le", + "start": 27.5, + "end": 27.62, + "confidence": 0.554 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.07, + "confidence": 0.631 + } + ] + }, + { + "id": 8, + "seek": 2804, + "start": 28.07, + "end": 30.6, + "text": " qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.741, + "words": [ + { + "text": "qui", + "start": 28.07, + "end": 28.26, + "confidence": 0.304 + }, + { + "text": "rend", + "start": 28.26, + "end": 28.5, + "confidence": 0.762 + }, + { + "text": "le", + "start": 28.5, + "end": 28.72, + "confidence": 0.988 + }, + { + "text": "contact", + "start": 28.72, + "end": 29.06, + "confidence": 0.817 + }, + { + "text": "plus", + "start": 29.06, + "end": 29.48, + "confidence": 0.841 + }, + { + "text": "direct,", + "start": 29.48, + "end": 30.02, + "confidence": 0.945 + }, + { + "text": "plus", + "start": 30.18, + "end": 30.24, + "confidence": 0.992 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.6, + "confidence": 0.618 + } + ] + }, + { + "id": 9, + "seek": 2804, + "start": 31.1, + "end": 34.76, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté", + "tokens": [ + 50496, + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 50698 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.775, + "words": [ + { + "text": "Et", + "start": 31.1, + "end": 31.24, + "confidence": 0.949 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.36, + "confidence": 0.725 + }, + { + "text": "évidemment,", + "start": 31.42, + "end": 31.62, + "confidence": 0.211 + }, + { + "text": "il", + "start": 31.7, + "end": 31.76, + "confidence": 0.959 + }, + { + "text": "faudrait", + "start": 31.76, + "end": 31.94, + "confidence": 0.994 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.14, + "confidence": 0.847 + }, + { + "text": "aussi", + "start": 32.14, + "end": 32.36, + "confidence": 0.924 + }, + { + "text": "des", + "start": 32.36, + "end": 32.46, + "confidence": 0.921 + }, + { + "text": "applications", + "start": 32.46, + "end": 32.88, + "confidence": 0.856 + }, + { + "text": "qui", + "start": 32.88, + "end": 33.2, + "confidence": 0.66 + }, + { + "text": "permettent", + "start": 33.2, + "end": 33.8, + "confidence": 0.952 + }, + { + "text": "de", + "start": 33.8, + "end": 33.96, + "confidence": 0.951 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.4, + "confidence": 0.72 + }, + { + "text": "le", + "start": 34.4, + "end": 34.52, + "confidence": 0.645 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.76, + "confidence": 0.63 + } + ] + }, + { + "id": 10, + "seek": 2804, + "start": 34.8, + "end": 37.86, + "text": " tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 50698, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13, + 50860 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.735, + "words": [ + { + "text": "tout", + "start": 34.8, + "end": 35.04, + "confidence": 0.946 + }, + { + "text": "flu", + "start": 35.04, + "end": 35.32, + "confidence": 0.488 + }, + { + "text": "de", + "start": 35.32, + "end": 35.64, + "confidence": 0.345 + }, + { + "text": "la", + "start": 35.64, + "end": 35.78, + "confidence": 0.922 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.903 + }, + { + "text": "web", + "start": 36.24, + "end": 36.64, + "confidence": 0.912 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.84, + "confidence": 0.571 + }, + { + "text": "aller", + "start": 36.84, + "end": 37.06, + "confidence": 0.991 + }, + { + "text": "directement", + "start": 37.06, + "end": 37.48, + "confidence": 0.981 + }, + { + "text": "en", + "start": 37.48, + "end": 37.7, + "confidence": 0.656 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.86, + "confidence": 0.768 + } + ] + }, + { + "id": 11, + "seek": 2804, + "start": 38.78, + "end": 43.13, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 50860, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11, + 51121 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.738, + "words": [ + { + "text": "Bref,", + "start": 38.78, + "end": 38.8, + "confidence": 0.972 + }, + { + "text": "tout", + "start": 38.84, + "end": 39.04, + "confidence": 0.816 + }, + { + "text": "ça,", + "start": 39.04, + "end": 39.46, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 39.54, + "end": 39.76, + "confidence": 0.905 + }, + { + "text": "sont", + "start": 39.76, + "end": 39.96, + "confidence": 0.981 + }, + { + "text": "les", + "start": 39.96, + "end": 40.1, + "confidence": 0.982 + }, + { + "text": "conditions", + "start": 40.1, + "end": 40.64, + "confidence": 0.957 + }, + { + "text": "qui", + "start": 40.64, + "end": 40.96, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.58, + "confidence": 0.993 + }, + { + "text": "de", + "start": 41.58, + "end": 41.64, + "confidence": 0.993 + }, + { + "text": "créer", + "start": 41.64, + "end": 42.08, + "confidence": 0.909 + }, + { + "text": "cette", + "start": 42.08, + "end": 42.34, + "confidence": 0.477 + }, + { + "text": "objet,", + "start": 42.34, + "end": 42.64, + "confidence": 0.237 + }, + { + "text": "en", + "start": 42.7, + "end": 42.84, + "confidence": 0.22 + }, + { + "text": "Nicolas,", + "start": 42.84, + "end": 43.13, + "confidence": 0.539 + } + ] + }, + { + "id": 12, + "seek": 2804, + "start": 43.13, + "end": 46.58, + "text": " dit qu'il est très symbolablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 51121, + 6176, + 421, + 6, + 388, + 871, + 5732, + 5986, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51290 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.717, + "words": [ + { + "text": "dit", + "start": 43.13, + "end": 43.56, + "confidence": 0.692 + }, + { + "text": "qu'il", + "start": 43.56, + "end": 43.76, + "confidence": 0.957 + }, + { + "text": "est", + "start": 43.76, + "end": 43.9, + "confidence": 0.848 + }, + { + "text": "très", + "start": 43.9, + "end": 44.08, + "confidence": 0.445 + }, + { + "text": "symbolablement", + "start": 44.08, + "end": 44.86, + "confidence": 0.321 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.54, + "confidence": 0.655 + }, + { + "text": "dans", + "start": 45.54, + "end": 45.74, + "confidence": 0.817 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 46.02, + "confidence": 0.825 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.984 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.58, + "confidence": 0.99 + } + ] + }, + { + "id": 13, + "seek": 2804, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 51290, + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13, + 51402 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.718, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.24, + "confidence": 0.928 + }, + { + "text": "ça", + "start": 47.24, + "end": 47.48, + "confidence": 0.75 + }, + { + "text": "s'assoulait", + "start": 47.48, + "end": 47.84, + "confidence": 0.616 + }, + { + "text": "d'une", + "start": 47.84, + "end": 48.08, + "confidence": 0.675 + }, + { + "text": "autre", + "start": 48.08, + "end": 48.18, + "confidence": 0.964 + }, + { + "text": "interrogation.", + "start": 48.18, + "end": 48.76, + "confidence": 0.855 + } + ] + }, + { + "id": 14, + "seek": 2804, + "start": 49.42, + "end": 54.93, + "text": " Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport", + "tokens": [ + 51402, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 1208, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212, + 517, + 18018, + 51710 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.704, + "words": [ + { + "text": "Est-ce", + "start": 49.42, + "end": 49.7, + "confidence": 0.919 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.989 + }, + { + "text": "le", + "start": 49.74, + "end": 49.82, + "confidence": 0.872 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.02, + "confidence": 0.946 + }, + { + "text": "que", + "start": 50.02, + "end": 50.16, + "confidence": 0.922 + }, + { + "text": "cette", + "start": 50.16, + "end": 50.3, + "confidence": 0.944 + }, + { + "text": "objet", + "start": 50.3, + "end": 50.64, + "confidence": 0.963 + }, + { + "text": "soit", + "start": 50.64, + "end": 51.1, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 51.1, + "end": 51.82, + "confidence": 0.928 + }, + { + "text": "un", + "start": 51.82, + "end": 52.08, + "confidence": 0.514 + }, + { + "text": "d'huits", + "start": 52.08, + "end": 52.34, + "confidence": 0.246 + }, + { + "text": "que", + "start": 52.34, + "end": 52.44, + "confidence": 0.974 + }, + { + "text": "notre", + "start": 52.44, + "end": 52.66, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.66, + "end": 53.24, + "confidence": 0.778 + }, + { + "text": "a", + "start": 53.24, + "end": 53.5, + "confidence": 0.572 + }, + { + "text": "lui", + "start": 53.5, + "end": 53.7, + "confidence": 0.698 + }, + { + "text": "est", + "start": 53.7, + "end": 54.04, + "confidence": 0.88 + }, + { + "text": "aussi", + "start": 54.04, + "end": 54.52, + "confidence": 0.814 + }, + { + "text": "un", + "start": 54.52, + "end": 54.7, + "confidence": 0.48 + }, + { + "text": "rapport", + "start": 54.7, + "end": 54.93, + "confidence": 0.951 + } + ] + }, + { + "id": 15, + "seek": 2804, + "start": 54.93, + "end": 55.88, + "text": " inédit ?", + "tokens": [ + 51710, + 294, + 7811, + 270, + 2506, + 51760 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.86, + "words": [ + { + "text": "inédit ?", + "start": 54.93, + "end": 55.88, + "confidence": 0.86 + } + ] + }, + { + "id": 16, + "seek": 5596, + "start": 55.96, + "end": 59.36, + "text": " Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 50530 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.62, + "words": [ + { + "text": "Est-ce", + "start": 55.96, + "end": 56.38, + "confidence": 0.55 + }, + { + "text": "que", + "start": 56.38, + "end": 56.44, + "confidence": 0.892 + }, + { + "text": "le", + "start": 56.44, + "end": 56.58, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.9, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.9, + "end": 57.14, + "confidence": 0.904 + }, + { + "text": "a", + "start": 57.14, + "end": 57.2, + "confidence": 0.959 + }, + { + "text": "au", + "start": 57.2, + "end": 57.28, + "confidence": 0.272 + }, + { + "text": "sein", + "start": 57.28, + "end": 57.42, + "confidence": 0.17 + }, + { + "text": "de", + "start": 57.42, + "end": 57.56, + "confidence": 0.23 + }, + { + "text": "foi", + "start": 57.56, + "end": 57.58, + "confidence": 0.121 + }, + { + "text": "n'est", + "start": 57.58, + "end": 57.78, + "confidence": 0.79 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.28, + "confidence": 0.547 + }, + { + "text": "à", + "start": 58.28, + "end": 58.48, + "confidence": 0.934 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.7, + "confidence": 0.851 + }, + { + "text": "qu'on", + "start": 58.7, + "end": 58.94, + "confidence": 0.926 + }, + { + "text": "entretenait", + "start": 58.94, + "end": 59.36, + "confidence": 0.56 + } + ] + }, + { + "id": 17, + "seek": 5596, + "start": 59.36, + "end": 63.14, + "text": " à d'autres objectes techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50530, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.757, + "words": [ + { + "text": "à", + "start": 59.36, + "end": 59.44, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.68, + "confidence": 0.857 + }, + { + "text": "objectes", + "start": 59.68, + "end": 60.02, + "confidence": 0.507 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.44, + "confidence": 0.463 + }, + { + "text": "comme", + "start": 60.44, + "end": 61.04, + "confidence": 0.639 + }, + { + "text": "la", + "start": 61.04, + "end": 61.52, + "confidence": 0.907 + }, + { + "text": "voiture", + "start": 61.52, + "end": 61.86, + "confidence": 0.947 + }, + { + "text": "ou", + "start": 61.86, + "end": 62.46, + "confidence": 0.758 + }, + { + "text": "le", + "start": 62.46, + "end": 62.68, + "confidence": 0.912 + }, + { + "text": "téléphone ?", + "start": 62.68, + "end": 63.14, + "confidence": 0.983 + } + ] + }, + { + "id": 18, + "seek": 5596, + "start": 65.4, + "end": 66.16, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 50714, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13, + 50872 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.881, + "words": [ + { + "text": "Il", + "start": 65.4, + "end": 65.5, + "confidence": 0.901 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.859 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.962 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.68, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.68, + "end": 66.16, + "confidence": 0.853 + } + ] + }, + { + "id": 19, + "seek": 5596, + "start": 66.16, + "end": 69.92, + "text": " On s'est espèrent de nous voter dans la relation à l'objet.", + "tokens": [ + 50872, + 1282, + 262, + 6, + 377, + 7089, + 1462, + 1753, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13, + 51058 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.501, + "words": [ + { + "text": "On", + "start": 66.16, + "end": 67.08, + "confidence": 0.301 + }, + { + "text": "s'est", + "start": 67.08, + "end": 67.3, + "confidence": 0.561 + }, + { + "text": "espèrent", + "start": 67.3, + "end": 67.62, + "confidence": 0.134 + }, + { + "text": "de", + "start": 67.62, + "end": 67.66, + "confidence": 0.99 + }, + { + "text": "nous", + "start": 67.66, + "end": 67.84, + "confidence": 0.763 + }, + { + "text": "voter", + "start": 67.84, + "end": 68.36, + "confidence": 0.183 + }, + { + "text": "dans", + "start": 68.36, + "end": 68.86, + "confidence": 0.82 + }, + { + "text": "la", + "start": 68.86, + "end": 68.96, + "confidence": 0.614 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.949 + }, + { + "text": "à", + "start": 69.24, + "end": 69.42, + "confidence": 0.75 + }, + { + "text": "l'objet.", + "start": 69.42, + "end": 69.92, + "confidence": 0.887 + } + ] + }, + { + "id": 20, + "seek": 5596, + "start": 70.2, + "end": 71.22, + "text": " C'est facilement éterricion.", + "tokens": [ + 51058, + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 1341, + 313, + 13, + 51122 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.443, + "words": [ + { + "text": "C'est", + "start": 70.2, + "end": 70.34, + "confidence": 0.896 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.7, + "confidence": 0.566 + }, + { + "text": "éterricion.", + "start": 70.7, + "end": 71.22, + "confidence": 0.231 + } + ] + }, + { + "id": 21, + "seek": 5596, + "start": 71.64, + "end": 76.97, + "text": " Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu", + "tokens": [ + 51122, + 20429, + 631, + 635, + 5418, + 368, + 287, + 6, + 20835, + 271, + 15540, + 1030, + 5385, + 2096, + 1473, + 257, + 45768, + 719, + 11, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 51416 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.457, + "words": [ + { + "text": "Parce", + "start": 71.64, + "end": 71.94, + "confidence": 0.68 + }, + { + "text": "que", + "start": 71.94, + "end": 72.32, + "confidence": 0.566 + }, + { + "text": "la", + "start": 72.32, + "end": 72.56, + "confidence": 0.333 + }, + { + "text": "passion", + "start": 72.56, + "end": 72.9, + "confidence": 0.266 + }, + { + "text": "de", + "start": 72.9, + "end": 73.26, + "confidence": 0.365 + }, + { + "text": "l'utilisateur", + "start": 73.26, + "end": 74.82, + "confidence": 0.5 + }, + { + "text": "et", + "start": 74.82, + "end": 74.92, + "confidence": 0.452 + }, + { + "text": "ses", + "start": 74.92, + "end": 75.04, + "confidence": 0.095 + }, + { + "text": "affices", + "start": 75.04, + "end": 75.24, + "confidence": 0.302 + }, + { + "text": "a", + "start": 75.24, + "end": 75.38, + "confidence": 0.589 + }, + { + "text": "dépendance,", + "start": 75.38, + "end": 76.06, + "confidence": 0.561 + }, + { + "text": "cette", + "start": 76.32, + "end": 76.34, + "confidence": 0.399 + }, + { + "text": "objet", + "start": 76.34, + "end": 76.54, + "confidence": 0.924 + }, + { + "text": "d'un", + "start": 76.54, + "end": 76.92, + "confidence": 0.781 + }, + { + "text": "lieu", + "start": 76.92, + "end": 76.97, + "confidence": 0.305 + } + ] + }, + { + "id": 22, + "seek": 5596, + "start": 76.97, + "end": 82.15, + "text": " en fait, une espèce de relation de médiation avec le monde qui rendent encore avec", + "tokens": [ + 51416, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368, + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 10122, + 4163, + 51671 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.746, + "words": [ + { + "text": "en", + "start": 76.97, + "end": 77.2, + "confidence": 0.77 + }, + { + "text": "fait,", + "start": 77.2, + "end": 77.32, + "confidence": 0.944 + }, + { + "text": "une", + "start": 77.46, + "end": 77.48, + "confidence": 0.699 + }, + { + "text": "espèce", + "start": 77.48, + "end": 77.9, + "confidence": 0.97 + }, + { + "text": "de", + "start": 77.9, + "end": 78.08, + "confidence": 0.993 + }, + { + "text": "relation", + "start": 78.08, + "end": 78.5, + "confidence": 0.827 + }, + { + "text": "de", + "start": 78.5, + "end": 78.94, + "confidence": 0.714 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.5, + "confidence": 0.88 + }, + { + "text": "avec", + "start": 79.5, + "end": 79.74, + "confidence": 0.968 + }, + { + "text": "le", + "start": 79.74, + "end": 79.94, + "confidence": 0.986 + }, + { + "text": "monde", + "start": 79.94, + "end": 80.44, + "confidence": 0.906 + }, + { + "text": "qui", + "start": 80.44, + "end": 81.14, + "confidence": 0.823 + }, + { + "text": "rendent", + "start": 81.14, + "end": 81.8, + "confidence": 0.586 + }, + { + "text": "encore", + "start": 81.8, + "end": 81.98, + "confidence": 0.198 + }, + { + "text": "avec", + "start": 81.98, + "end": 82.15, + "confidence": 0.491 + } + ] + }, + { + "id": 23, + "seek": 8210, + "start": 82.15, + "end": 83.44, + "text": " la maille de celles formes de rogeur.", + "tokens": [ + 50364, + 635, + 463, + 3409, + 368, + 2815, + 279, + 1254, + 279, + 368, + 744, + 432, + 374, + 13, + 50440 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.465, + "words": [ + { + "text": "la", + "start": 82.15, + "end": 82.22, + "confidence": 0.647 + }, + { + "text": "maille", + "start": 82.22, + "end": 82.4, + "confidence": 0.136 + }, + { + "text": "de", + "start": 82.4, + "end": 82.48, + "confidence": 0.588 + }, + { + "text": "celles", + "start": 82.48, + "end": 82.74, + "confidence": 0.561 + }, + { + "text": "formes", + "start": 82.74, + "end": 82.96, + "confidence": 0.778 + }, + { + "text": "de", + "start": 82.96, + "end": 83.0, + "confidence": 0.936 + }, + { + "text": "rogeur.", + "start": 83.0, + "end": 83.44, + "confidence": 0.433 + } + ] + }, + { + "id": 24, + "seek": 8210, + "start": 83.98, + "end": 87.86, + "text": " Donc, à objets inédits, rapport inédits.", + "tokens": [ + 50440, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13, + 50640 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.744, + "words": [ + { + "text": "Donc,", + "start": 83.98, + "end": 84.46, + "confidence": 0.831 + }, + { + "text": "à", + "start": 84.56, + "end": 84.96, + "confidence": 0.699 + }, + { + "text": "objets", + "start": 84.96, + "end": 85.44, + "confidence": 0.525 + }, + { + "text": "inédits,", + "start": 85.44, + "end": 86.24, + "confidence": 0.7 + }, + { + "text": "rapport", + "start": 86.32, + "end": 86.92, + "confidence": 0.945 + }, + { + "text": "inédits.", + "start": 86.92, + "end": 87.86, + "confidence": 0.908 + } + ] + }, + { + "id": 25, + "seek": 8210, + "start": 88.1, + "end": 94.2, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance", + "tokens": [ + 50640, + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.669, + "words": [ + { + "text": "Et,", + "start": 88.1, + "end": 88.3, + "confidence": 0.865 + }, + { + "text": "ce", + "start": 88.58, + "end": 88.84, + "confidence": 0.278 + }, + { + "text": "rapport,", + "start": 88.84, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.933 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.781 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.92, + "confidence": 0.304 + }, + { + "text": "Nicolas,", + "start": 89.92, + "end": 90.22, + "confidence": 0.652 + }, + { + "text": "frère", + "start": 90.78, + "end": 91.1, + "confidence": 0.432 + }, + { + "text": "caractérisée", + "start": 91.1, + "end": 91.8, + "confidence": 0.595 + }, + { + "text": "par", + "start": 91.8, + "end": 92.14, + "confidence": 0.907 + }, + { + "text": "un", + "start": 92.14, + "end": 92.34, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.34, + "end": 92.98, + "confidence": 0.921 + }, + { + "text": "de", + "start": 92.98, + "end": 93.24, + "confidence": 0.841 + }, + { + "text": "dépendance", + "start": 93.24, + "end": 94.2, + "confidence": 0.674 + } + ] + }, + { + "id": 26, + "seek": 8210, + "start": 94.36, + "end": 95.08, + "text": " et de rogeur.", + "tokens": [ + 50972, + 1030, + 368, + 744, + 432, + 374, + 13, + 51022 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.85, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.97 + }, + { + "text": "de", + "start": 94.52, + "end": 94.64, + "confidence": 0.996 + }, + { + "text": "rogeur.", + "start": 94.64, + "end": 95.08, + "confidence": 0.771 + } + ] + }, + { + "id": 27, + "seek": 8210, + "start": 96.3, + "end": 100.48, + "text": " Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes", + "tokens": [ + 51022, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 11, + 5732, + 2007, + 394, + 3486, + 287, + 6, + 29093, + 730, + 2657, + 279, + 51286 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.752, + "words": [ + { + "text": "Bon,", + "start": 96.3, + "end": 96.32, + "confidence": 0.601 + }, + { + "text": "en", + "start": 96.36, + "end": 96.58, + "confidence": 0.919 + }, + { + "text": "vrai,", + "start": 96.58, + "end": 96.92, + "confidence": 0.993 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.987 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.56, + "confidence": 0.933 + }, + { + "text": "remonter", + "start": 97.56, + "end": 98.08, + "confidence": 0.61 + }, + { + "text": "très,", + "start": 98.08, + "end": 98.58, + "confidence": 0.975 + }, + { + "text": "très", + "start": 98.8, + "end": 98.86, + "confidence": 0.986 + }, + { + "text": "filmant", + "start": 98.86, + "end": 99.42, + "confidence": 0.409 + }, + { + "text": "tout", + "start": 99.42, + "end": 99.68, + "confidence": 0.346 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.08, + "confidence": 0.876 + }, + { + "text": "des", + "start": 100.08, + "end": 100.22, + "confidence": 0.957 + }, + { + "text": "objectes", + "start": 100.22, + "end": 100.48, + "confidence": 0.844 + } + ] + }, + { + "id": 28, + "seek": 8210, + "start": 100.48, + "end": 105.24, + "text": " techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement", + "tokens": [ + 51286, + 7512, + 1030, + 368, + 9580, + 1536, + 911, + 313, + 2680, + 3269, + 4941, + 2449, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 51530 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.614, + "words": [ + { + "text": "techniques", + "start": 100.48, + "end": 101.0, + "confidence": 0.951 + }, + { + "text": "et", + "start": 101.0, + "end": 101.58, + "confidence": 0.944 + }, + { + "text": "de", + "start": 101.58, + "end": 101.72, + "confidence": 0.975 + }, + { + "text": "leur", + "start": 101.72, + "end": 101.8, + "confidence": 0.806 + }, + { + "text": "infertion", + "start": 101.8, + "end": 102.34, + "confidence": 0.383 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.298 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.68, + "confidence": 0.422 + }, + { + "text": "vieux", + "start": 102.68, + "end": 103.08, + "confidence": 0.388 + }, + { + "text": "pour", + "start": 103.08, + "end": 103.1, + "confidence": 0.275 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.976 + }, + { + "text": "si", + "start": 103.64, + "end": 103.8, + "confidence": 0.367 + }, + { + "text": "ce", + "start": 103.8, + "end": 103.92, + "confidence": 0.983 + }, + { + "text": "rapport", + "start": 103.92, + "end": 104.22, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.22, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.24, + "confidence": 0.908 + } + ] + }, + { + "id": 29, + "seek": 8210, + "start": 105.24, + "end": 106.1, + "text": " inédit.", + "tokens": [ + 51530, + 294, + 7811, + 270, + 13, + 51580 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.939, + "words": [ + { + "text": "inédit.", + "start": 105.24, + "end": 106.1, + "confidence": 0.939 + } + ] + }, + { + "id": 30, + "seek": 8210, + "start": 106.16, + "end": 109.44, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 51580, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51738 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.811, + "words": [ + { + "text": "Mais", + "start": 106.16, + "end": 106.5, + "confidence": 0.723 + }, + { + "text": "j'ai", + "start": 106.5, + "end": 106.92, + "confidence": 0.91 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.34, + "confidence": 0.956 + }, + { + "text": "comme", + "start": 107.34, + "end": 107.58, + "confidence": 0.7 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.74, + "confidence": 0.953 + }, + { + "text": "que", + "start": 107.74, + "end": 108.06, + "confidence": 0.941 + }, + { + "text": "Nicolas", + "start": 108.06, + "end": 108.46, + "confidence": 0.994 + }, + { + "text": "se", + "start": 108.46, + "end": 108.68, + "confidence": 0.714 + }, + { + "text": "trompe", + "start": 108.68, + "end": 109.02, + "confidence": 0.54 + }, + { + "text": "pas", + "start": 109.02, + "end": 109.16, + "confidence": 0.887 + }, + { + "text": "vraiment.", + "start": 109.16, + "end": 109.44, + "confidence": 0.964 + } + ] + }, + { + "id": 31, + "seek": 8210, + "start": 109.92, + "end": 110.86, + "text": " Pour autant, je sache.", + "tokens": [ + 51738, + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 51808 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.6, + "words": [ + { + "text": "Pour", + "start": 109.92, + "end": 110.1, + "confidence": 0.486 + }, + { + "text": "autant,", + "start": 110.1, + "end": 110.28, + "confidence": 0.746 + }, + { + "text": "je", + "start": 110.42, + "end": 110.48, + "confidence": 0.932 + }, + { + "text": "sache.", + "start": 110.48, + "end": 110.86, + "confidence": 0.479 + } + ] + }, + { + "id": 32, + "seek": 11098, + "start": 111.04, + "end": 115.0, + "text": " Il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 50364, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 50568 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.822, + "words": [ + { + "text": "Il", + "start": 111.04, + "end": 111.2, + "confidence": 0.905 + }, + { + "text": "y", + "start": 111.2, + "end": 111.3, + "confidence": 0.934 + }, + { + "text": "a", + "start": 111.3, + "end": 111.56, + "confidence": 0.894 + }, + { + "text": "eu", + "start": 111.56, + "end": 111.6, + "confidence": 0.839 + }, + { + "text": "plein", + "start": 111.6, + "end": 111.88, + "confidence": 0.774 + }, + { + "text": "de", + "start": 111.88, + "end": 112.12, + "confidence": 0.944 + }, + { + "text": "discussions", + "start": 112.12, + "end": 112.6, + "confidence": 0.681 + }, + { + "text": "autour", + "start": 112.6, + "end": 113.04, + "confidence": 0.978 + }, + { + "text": "de", + "start": 113.04, + "end": 113.48, + "confidence": 0.974 + }, + { + "text": "la", + "start": 113.48, + "end": 113.56, + "confidence": 0.967 + }, + { + "text": "voiture", + "start": 113.56, + "end": 113.88, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.14, + "confidence": 0.661 + }, + { + "text": "même", + "start": 114.14, + "end": 114.34, + "confidence": 0.99 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.289 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.0, + "confidence": 0.986 + } + ] + }, + { + "id": 33, + "seek": 11098, + "start": 115.52, + "end": 119.51, + "text": " Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même", + "tokens": [ + 50568, + 6313, + 11, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 50790 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.702, + "words": [ + { + "text": "Mais,", + "start": 115.52, + "end": 115.82, + "confidence": 0.984 + }, + { + "text": "la", + "start": 115.92, + "end": 116.0, + "confidence": 0.961 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.42, + "confidence": 0.616 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.82, + "confidence": 0.956 + }, + { + "text": "pas", + "start": 116.82, + "end": 117.04, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.04, + "end": 117.2, + "confidence": 0.958 + }, + { + "text": "même", + "start": 117.2, + "end": 117.38, + "confidence": 0.929 + }, + { + "text": "mort,", + "start": 117.38, + "end": 117.58, + "confidence": 0.685 + }, + { + "text": "donc", + "start": 117.7, + "end": 118.02, + "confidence": 0.931 + }, + { + "text": "le", + "start": 118.02, + "end": 118.34, + "confidence": 0.95 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.64, + "confidence": 0.57 + }, + { + "text": "n'en", + "start": 118.64, + "end": 118.82, + "confidence": 0.429 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.96, + "confidence": 0.056 + }, + { + "text": "n'était", + "start": 118.96, + "end": 119.16, + "confidence": 0.972 + }, + { + "text": "pas", + "start": 119.16, + "end": 119.32, + "confidence": 0.993 + }, + { + "text": "du", + "start": 119.32, + "end": 119.42, + "confidence": 0.821 + }, + { + "text": "même", + "start": 119.42, + "end": 119.51, + "confidence": 0.789 + } + ] + }, + { + "id": 34, + "seek": 11098, + "start": 119.51, + "end": 119.72, + "text": " mort.", + "tokens": [ + 50790, + 6599, + 13, + 50840 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.644, + "words": [ + { + "text": "mort.", + "start": 119.51, + "end": 119.72, + "confidence": 0.644 + } + ] + }, + { + "id": 35, + "seek": 11098, + "start": 120.04, + "end": 121.24, + "text": " On peut adorer sa bagnure.", + "tokens": [ + 50840, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 13, + 50890 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.614, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.22, + "confidence": 0.981 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.36, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.7, + "confidence": 0.858 + }, + { + "text": "sa", + "start": 120.7, + "end": 120.88, + "confidence": 0.918 + }, + { + "text": "bagnure.", + "start": 120.88, + "end": 121.24, + "confidence": 0.314 + } + ] + }, + { + "id": 36, + "seek": 11098, + "start": 121.38, + "end": 123.06, + "text": " On a par besoin pour plein de choses.", + "tokens": [ + 50890, + 1282, + 257, + 971, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.721, + "words": [ + { + "text": "On", + "start": 121.38, + "end": 121.56, + "confidence": 0.896 + }, + { + "text": "a", + "start": 121.56, + "end": 121.64, + "confidence": 0.653 + }, + { + "text": "par", + "start": 121.64, + "end": 121.8, + "confidence": 0.161 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.947 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.5, + "confidence": 0.976 + }, + { + "text": "plein", + "start": 122.5, + "end": 122.72, + "confidence": 0.858 + }, + { + "text": "de", + "start": 122.72, + "end": 122.8, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.06, + "confidence": 0.988 + } + ] + }, + { + "id": 37, + "seek": 11098, + "start": 123.36, + "end": 126.46, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50972, + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 51142 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.835, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.5, + "confidence": 0.606 + }, + { + "text": "là,", + "start": 123.5, + "end": 123.76, + "confidence": 0.499 + }, + { + "text": "le", + "start": 123.82, + "end": 124.06, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.06, + "end": 124.42, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 124.84, + "end": 124.96, + "confidence": 0.79 + }, + { + "text": "on", + "start": 124.96, + "end": 125.06, + "confidence": 0.988 + }, + { + "text": "va", + "start": 125.06, + "end": 125.18, + "confidence": 0.967 + }, + { + "text": "se", + "start": 125.18, + "end": 125.26, + "confidence": 0.868 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.6, + "confidence": 0.803 + }, + { + "text": "on", + "start": 125.82, + "end": 126.06, + "confidence": 0.98 + }, + { + "text": "la", + "start": 126.06, + "end": 126.2, + "confidence": 0.81 + }, + { + "text": "laisse.", + "start": 126.2, + "end": 126.46, + "confidence": 0.984 + } + ] + }, + { + "id": 38, + "seek": 11098, + "start": 127.06, + "end": 130.1, + "text": " On la pade en la main quand on est colis, qu'on n'a même pas au chiot.", + "tokens": [ + 51142, + 1282, + 635, + 280, + 762, + 465, + 635, + 2135, + 6932, + 322, + 871, + 1173, + 271, + 11, + 421, + 6, + 266, + 297, + 6, + 64, + 5698, + 1736, + 1609, + 417, + 6471, + 13, + 51334 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.543, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.32, + "confidence": 0.606 + }, + { + "text": "la", + "start": 127.32, + "end": 127.5, + "confidence": 0.349 + }, + { + "text": "pade", + "start": 127.5, + "end": 127.72, + "confidence": 0.181 + }, + { + "text": "en", + "start": 127.72, + "end": 127.88, + "confidence": 0.774 + }, + { + "text": "la", + "start": 127.88, + "end": 128.04, + "confidence": 0.796 + }, + { + "text": "main", + "start": 128.04, + "end": 128.3, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.46, + "confidence": 0.483 + }, + { + "text": "on", + "start": 128.46, + "end": 128.62, + "confidence": 0.994 + }, + { + "text": "est", + "start": 128.62, + "end": 128.74, + "confidence": 0.708 + }, + { + "text": "colis,", + "start": 128.74, + "end": 129.12, + "confidence": 0.503 + }, + { + "text": "qu'on", + "start": 129.22, + "end": 129.32, + "confidence": 0.776 + }, + { + "text": "n'a", + "start": 129.32, + "end": 129.42, + "confidence": 0.441 + }, + { + "text": "même", + "start": 129.42, + "end": 129.56, + "confidence": 0.333 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.74, + "confidence": 1.0 + }, + { + "text": "au", + "start": 129.74, + "end": 129.88, + "confidence": 0.638 + }, + { + "text": "chiot.", + "start": 129.88, + "end": 130.1, + "confidence": 0.514 + } + ] + }, + { + "id": 39, + "seek": 11098, + "start": 130.94, + "end": 135.32, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure", + "tokens": [ + 51334, + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 51574 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.655, + "words": [ + { + "text": "On", + "start": 130.94, + "end": 131.04, + "confidence": 0.99 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.588 + }, + { + "text": "être", + "start": 131.28, + "end": 131.6, + "confidence": 0.443 + }, + { + "text": "émervé", + "start": 131.6, + "end": 132.24, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.24, + "end": 132.5, + "confidence": 0.85 + }, + { + "text": "son", + "start": 132.5, + "end": 132.7, + "confidence": 0.443 + }, + { + "text": "mome", + "start": 132.7, + "end": 133.04, + "confidence": 0.255 + }, + { + "text": "qui", + "start": 133.04, + "end": 133.3, + "confidence": 0.911 + }, + { + "text": "occupeait", + "start": 133.3, + "end": 133.74, + "confidence": 0.638 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.807 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.02, + "confidence": 0.974 + }, + { + "text": "de", + "start": 134.02, + "end": 134.24, + "confidence": 0.929 + }, + { + "text": "téléphone", + "start": 134.24, + "end": 134.56, + "confidence": 0.978 + }, + { + "text": "pendant", + "start": 134.56, + "end": 134.92, + "confidence": 0.899 + }, + { + "text": "une", + "start": 134.92, + "end": 135.18, + "confidence": 0.814 + }, + { + "text": "heure", + "start": 135.18, + "end": 135.32, + "confidence": 0.482 + } + ] + }, + { + "id": 40, + "seek": 11098, + "start": 135.32, + "end": 137.04, + "text": " chaque soir pour discuter avec un copain.", + "tokens": [ + 51574, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.955, + "words": [ + { + "text": "chaque", + "start": 135.32, + "end": 135.6, + "confidence": 0.99 + }, + { + "text": "soir", + "start": 135.6, + "end": 135.8, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.8, + "end": 136.0, + "confidence": 0.981 + }, + { + "text": "discuter", + "start": 136.0, + "end": 136.34, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.34, + "end": 136.5, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.5, + "end": 136.62, + "confidence": 0.967 + }, + { + "text": "copain.", + "start": 136.62, + "end": 137.04, + "confidence": 0.954 + } + ] + }, + { + "id": 41, + "seek": 13698, + "start": 137.32, + "end": 141.84, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui.", + "tokens": [ + 50376, + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556, + 13, + 50606 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.735, + "words": [ + { + "text": "Mais", + "start": 137.32, + "end": 137.52, + "confidence": 0.984 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.7, + "confidence": 0.877 + }, + { + "text": "ne", + "start": 137.7, + "end": 137.8, + "confidence": 0.671 + }, + { + "text": "ressemble", + "start": 137.8, + "end": 138.22, + "confidence": 0.731 + }, + { + "text": "pas", + "start": 138.22, + "end": 138.76, + "confidence": 0.532 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.98 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.08, + "confidence": 0.574 + }, + { + "text": "qu'on", + "start": 139.08, + "end": 139.22, + "confidence": 0.96 + }, + { + "text": "peut", + "start": 139.22, + "end": 139.38, + "confidence": 0.548 + }, + { + "text": "ressentir", + "start": 139.38, + "end": 140.06, + "confidence": 0.888 + }, + { + "text": "à", + "start": 140.06, + "end": 140.26, + "confidence": 0.434 + }, + { + "text": "voir", + "start": 140.26, + "end": 140.54, + "confidence": 0.951 + }, + { + "text": "même", + "start": 140.54, + "end": 140.96, + "confidence": 0.297 + }, + { + "text": "mome", + "start": 140.96, + "end": 141.22, + "confidence": 0.495 + }, + { + "text": "aujourd'hui.", + "start": 141.22, + "end": 141.84, + "confidence": 0.961 + } + ] + }, + { + "id": 42, + "seek": 13698, + "start": 141.96, + "end": 145.86, + "text": " Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 50606, + 14674, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540, + 50810 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.663, + "words": [ + { + "text": "Continuellement", + "start": 141.96, + "end": 142.84, + "confidence": 0.843 + }, + { + "text": "avec", + "start": 142.84, + "end": 143.24, + "confidence": 0.642 + }, + { + "text": "son", + "start": 143.24, + "end": 143.42, + "confidence": 0.865 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.72, + "confidence": 0.447 + }, + { + "text": "dans", + "start": 143.72, + "end": 143.92, + "confidence": 0.59 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.974 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.3, + "confidence": 0.995 + }, + { + "text": "comme", + "start": 144.42, + "end": 144.56, + "confidence": 0.895 + }, + { + "text": "c'était", + "start": 144.56, + "end": 144.84, + "confidence": 0.595 + }, + { + "text": "une", + "start": 144.84, + "end": 144.96, + "confidence": 0.973 + }, + { + "text": "sorte", + "start": 144.96, + "end": 145.1, + "confidence": 0.655 + }, + { + "text": "de", + "start": 145.1, + "end": 145.4, + "confidence": 0.314 + }, + { + "text": "estimateur", + "start": 145.4, + "end": 145.86, + "confidence": 0.489 + } + ] + }, + { + "id": 43, + "seek": 13698, + "start": 145.96, + "end": 149.0, + "text": " extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 50810, + 1279, + 4212, + 368, + 2916, + 607, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13, + 50954 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.415, + "words": [ + { + "text": "extère", + "start": 145.96, + "end": 146.4, + "confidence": 0.373 + }, + { + "text": "de", + "start": 146.4, + "end": 146.54, + "confidence": 0.254 + }, + { + "text": "tomber", + "start": 146.54, + "end": 146.7, + "confidence": 0.102 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.455 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.34, + "confidence": 0.751 + }, + { + "text": "à", + "start": 147.34, + "end": 147.46, + "confidence": 0.444 + }, + { + "text": "l'éantrénée,", + "start": 147.46, + "end": 147.9, + "confidence": 0.491 + }, + { + "text": "ça", + "start": 148.0, + "end": 148.02, + "confidence": 0.798 + }, + { + "text": "m'a", + "start": 148.02, + "end": 148.26, + "confidence": 0.529 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.44, + "confidence": 0.136 + }, + { + "text": "immédiate.", + "start": 148.44, + "end": 149.0, + "confidence": 0.502 + } + ] + }, + { + "id": 44, + "seek": 13698, + "start": 149.08, + "end": 151.98, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi.", + "tokens": [ + 50954, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 11, + 4428, + 6212, + 13, + 51126 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.564, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.28, + "confidence": 0.375 + }, + { + "text": "je", + "start": 149.32, + "end": 149.4, + "confidence": 0.468 + }, + { + "text": "dis", + "start": 149.4, + "end": 149.46, + "confidence": 0.321 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.952 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.8, + "confidence": 0.964 + }, + { + "text": "le", + "start": 149.8, + "end": 149.92, + "confidence": 0.992 + }, + { + "text": "mome,", + "start": 149.92, + "end": 150.12, + "confidence": 0.621 + }, + { + "text": "mais", + "start": 150.46, + "end": 150.52, + "confidence": 0.692 + }, + { + "text": "évidemment,", + "start": 150.52, + "end": 151.12, + "confidence": 0.781 + }, + { + "text": "va", + "start": 151.26, + "end": 151.36, + "confidence": 0.39 + }, + { + "text": "là,", + "start": 151.36, + "end": 151.46, + "confidence": 0.591 + }, + { + "text": "bon", + "start": 151.6, + "end": 151.68, + "confidence": 0.827 + }, + { + "text": "aussi.", + "start": 151.68, + "end": 151.98, + "confidence": 0.163 + } + ] + }, + { + "id": 45, + "seek": 13698, + "start": 152.64, + "end": 154.64, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 51126, + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13, + 51248 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.86, + "words": [ + { + "text": "Donc,", + "start": 152.64, + "end": 153.08, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 153.16, + "end": 153.58, + "confidence": 0.979 + }, + { + "text": "immédiate", + "start": 153.58, + "end": 154.28, + "confidence": 0.839 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.64, + "confidence": 0.815 + } + ] + }, + { + "id": 46, + "seek": 13698, + "start": 155.66, + "end": 158.36, + "text": " Mais pourquoi, à ton impression qu'on en sortira, j'amé?", + "tokens": [ + 51248, + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 11, + 361, + 6, + 335, + 526, + 30, + 51440 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.709, + "words": [ + { + "text": "Mais", + "start": 155.66, + "end": 155.88, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.88, + "end": 156.28, + "confidence": 0.898 + }, + { + "text": "à", + "start": 156.38, + "end": 156.58, + "confidence": 0.838 + }, + { + "text": "ton", + "start": 156.58, + "end": 156.7, + "confidence": 0.952 + }, + { + "text": "impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.939 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.889 + }, + { + "text": "en", + "start": 157.28, + "end": 157.4, + "confidence": 0.801 + }, + { + "text": "sortira,", + "start": 157.4, + "end": 157.88, + "confidence": 0.577 + }, + { + "text": "j'amé?", + "start": 157.96, + "end": 158.36, + "confidence": 0.463 + } + ] + }, + { + "id": 47, + "seek": 13698, + "start": 159.16, + "end": 163.32, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux", + "tokens": [ + 51440, + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550, + 25673, + 3551, + 303, + 3409, + 2449, + 51678 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.715, + "words": [ + { + "text": "Et", + "start": 159.16, + "end": 159.32, + "confidence": 0.83 + }, + { + "text": "puis,", + "start": 159.32, + "end": 159.46, + "confidence": 0.495 + }, + { + "text": "il", + "start": 159.62, + "end": 159.64, + "confidence": 0.594 + }, + { + "text": "faut", + "start": 159.64, + "end": 159.66, + "confidence": 0.951 + }, + { + "text": "en", + "start": 159.66, + "end": 159.78, + "confidence": 0.937 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.14, + "confidence": 0.984 + }, + { + "text": "la", + "start": 160.14, + "end": 160.42, + "confidence": 0.611 + }, + { + "text": "faute", + "start": 160.42, + "end": 160.68, + "confidence": 0.591 + }, + { + "text": "sur", + "start": 160.68, + "end": 160.98, + "confidence": 0.966 + }, + { + "text": "les", + "start": 160.98, + "end": 161.24, + "confidence": 0.83 + }, + { + "text": "gens", + "start": 161.24, + "end": 161.46, + "confidence": 0.987 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.6, + "confidence": 0.98 + }, + { + "text": "ont", + "start": 161.6, + "end": 161.68, + "confidence": 0.951 + }, + { + "text": "créé", + "start": 161.68, + "end": 162.3, + "confidence": 0.95 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.48, + "confidence": 0.909 + }, + { + "text": "critique", + "start": 162.48, + "end": 162.72, + "confidence": 0.033 + }, + { + "text": "merveilleux", + "start": 162.72, + "end": 163.32, + "confidence": 0.841 + } + ] + }, + { + "id": 48, + "seek": 13698, + "start": 163.32, + "end": 165.34, + "text": " et diabolique et diabolique par que merveilleux.", + "tokens": [ + 51678, + 1030, + 1026, + 14923, + 1925, + 1030, + 1026, + 14923, + 1925, + 971, + 631, + 3551, + 303, + 3409, + 2449, + 13, + 51778 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.709, + "words": [ + { + "text": "et", + "start": 163.32, + "end": 163.44, + "confidence": 0.946 + }, + { + "text": "diabolique", + "start": 163.44, + "end": 163.86, + "confidence": 0.574 + }, + { + "text": "et", + "start": 163.86, + "end": 163.94, + "confidence": 0.239 + }, + { + "text": "diabolique", + "start": 163.94, + "end": 164.38, + "confidence": 0.893 + }, + { + "text": "par", + "start": 164.38, + "end": 164.62, + "confidence": 0.548 + }, + { + "text": "que", + "start": 164.62, + "end": 164.82, + "confidence": 0.529 + }, + { + "text": "merveilleux.", + "start": 164.82, + "end": 165.34, + "confidence": 0.981 + } + ] + }, + { + "id": 49, + "seek": 16526, + "start": 166.9, + "end": 168.8, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 50410, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13, + 50542 + ], + "temperature": 0.0, + "avg_logprob": -0.6641062498092651, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.18914750218391418, + "confidence": 0.765, + "words": [ + { + "text": "Les", + "start": 166.9, + "end": 167.06, + "confidence": 0.699 + }, + { + "text": "économistes", + "start": 167.06, + "end": 167.52, + "confidence": 0.916 + }, + { + "text": "parlent", + "start": 167.52, + "end": 167.78, + "confidence": 0.794 + }, + { + "text": "de", + "start": 167.78, + "end": 167.84, + "confidence": 0.813 + }, + { + "text": "dépendance", + "start": 167.84, + "end": 168.36, + "confidence": 0.699 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.963 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.8, + "confidence": 0.487 + } + ] + }, + { + "id": 50, + "seek": 16526, + "start": 168.82, + "end": 173.42, + "text": " Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus,", + "tokens": [ + 50542, + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 271, + 11, + 517, + 12703, + 1108, + 1433, + 533, + 11, + 465, + 8368, + 474, + 30677, + 11, + 50784 + ], + "temperature": 0.0, + "avg_logprob": -0.6641062498092651, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.18914750218391418, + "confidence": 0.474, + "words": [ + { + "text": "Ces", + "start": 168.82, + "end": 169.14, + "confidence": 0.439 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.4, + "confidence": 0.459 + }, + { + "text": "en", + "start": 169.44, + "end": 169.64, + "confidence": 0.747 + }, + { + "text": "fait,", + "start": 169.64, + "end": 169.66, + "confidence": 0.976 + }, + { + "text": "on", + "start": 169.72, + "end": 169.74, + "confidence": 0.302 + }, + { + "text": "est", + "start": 169.74, + "end": 169.9, + "confidence": 0.815 + }, + { + "text": "un", + "start": 169.9, + "end": 170.48, + "confidence": 0.364 + }, + { + "text": "santé", + "start": 170.48, + "end": 170.8, + "confidence": 0.951 + }, + { + "text": "qui", + "start": 170.8, + "end": 170.92, + "confidence": 0.836 + }, + { + "text": "a", + "start": 170.92, + "end": 171.02, + "confidence": 0.821 + }, + { + "text": "été", + "start": 171.02, + "end": 171.12, + "confidence": 0.994 + }, + { + "text": "étabis,", + "start": 171.12, + "end": 171.5, + "confidence": 0.342 + }, + { + "text": "un", + "start": 171.76, + "end": 171.9, + "confidence": 0.233 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.16, + "confidence": 0.311 + }, + { + "text": "mon", + "start": 172.16, + "end": 172.36, + "confidence": 0.205 + }, + { + "text": "termine,", + "start": 172.36, + "end": 172.72, + "confidence": 0.4 + }, + { + "text": "en", + "start": 172.8, + "end": 172.82, + "confidence": 0.31 + }, + { + "text": "marchand", + "start": 172.82, + "end": 173.14, + "confidence": 0.816 + }, + { + "text": "dessus,", + "start": 173.14, + "end": 173.42, + "confidence": 0.205 + } + ] + }, + { + "id": 51, + "seek": 16526, + "start": 173.86, + "end": 177.42, + "text": " soit des finissants débordes, des finissants, une signalétique.", + "tokens": [ + 50784, + 12703, + 730, + 962, + 891, + 1719, + 36529, + 765, + 279, + 11, + 730, + 962, + 891, + 1719, + 11, + 2251, + 6358, + 42379, + 13, + 50974 + ], + "temperature": 0.0, + "avg_logprob": -0.6641062498092651, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.18914750218391418, + "confidence": 0.582, + "words": [ + { + "text": "soit", + "start": 173.86, + "end": 174.4, + "confidence": 0.993 + }, + { + "text": "des", + "start": 174.4, + "end": 175.2, + "confidence": 0.762 + }, + { + "text": "finissants", + "start": 175.2, + "end": 175.58, + "confidence": 0.436 + }, + { + "text": "débordes,", + "start": 175.58, + "end": 175.98, + "confidence": 0.467 + }, + { + "text": "des", + "start": 176.14, + "end": 176.18, + "confidence": 0.22 + }, + { + "text": "finissants,", + "start": 176.18, + "end": 176.62, + "confidence": 0.961 + }, + { + "text": "une", + "start": 176.66, + "end": 176.84, + "confidence": 0.748 + }, + { + "text": "signalétique.", + "start": 176.84, + "end": 177.42, + "confidence": 0.567 + } + ] + } + ], + "language": "fr", + "language_probs": { + "en": 0.0030481417197734118, + "zh": 0.0006290195160545409, + "de": 0.0005133908125571907, + "es": 0.00025814908440224826, + "ru": 9.496775601292029e-05, + "ko": 0.00039982915041036904, + "fr": 0.9880982041358948, + "ja": 0.001462514279410243, + "pt": 0.0008076771046034992, + "tr": 0.00010511971777305007, + "pl": 5.1633072871482e-05, + "ca": 5.033106390328612e-06, + "nl": 0.00017881467647384852, + "ar": 0.00031138729536905885, + "sv": 5.896681614103727e-05, + "it": 0.0004061255604028702, + "id": 5.233602678345051e-06, + "hi": 1.8700258806347847e-05, + "fi": 9.383333008372574e-07, + "vi": 1.4678001207357738e-05, + "he": 2.255681829410605e-05, + "uk": 6.758561426067899e-07, + "el": 1.2360116670606658e-05, + "ms": 3.576518429326825e-05, + "cs": 6.215056600922253e-06, + "ro": 1.216849068441661e-05, + "da": 7.153486421884736e-06, + "hu": 9.626068276702426e-06, + "ta": 6.973100994400738e-07, + "no": 1.3574932381743565e-05, + "th": 2.006244903896004e-05, + "ur": 5.033106390328612e-06, + "hr": 9.290757674307315e-08, + "bg": 2.3589695956616197e-06, + "lt": 3.2486479284443703e-08, + "la": 0.00038151966873556376, + "mi": 5.399735073297052e-06, + "ml": 1.4994523098721402e-06, + "cy": 3.837044278043322e-05, + "sk": 3.3457303061368293e-07, + "te": 1.4166712958285643e-07, + "fa": 3.1251270229404327e-06, + "lv": 2.4426476841199474e-08, + "bn": 2.181690206271014e-06, + "sr": 3.210799448538637e-08, + "az": 4.86800843191304e-07, + "sl": 6.299671895249048e-07, + "kn": 3.148696947619101e-08, + "et": 7.642368160531987e-08, + "mk": 1.3308395807598572e-07, + "br": 0.0019074783194810152, + "eu": 1.2653227713599335e-05, + "is": 2.752122725269146e-07, + "hy": 1.2528391835076036e-06, + "ne": 1.2600135335105733e-07, + "mn": 1.95565439753409e-06, + "bs": 8.0719495088033e-08, + "kk": 1.8621793174133927e-07, + "sq": 1.2926086583320284e-06, + "sw": 2.25094436245854e-06, + "gl": 1.216849068441661e-05, + "mr": 7.854222161540747e-08, + "pa": 6.153739491310262e-07, + "si": 2.8233168904989725e-06, + "km": 2.4773818950052373e-05, + "sn": 2.2733735022484325e-05, + "yo": 2.345538086956367e-05, + "so": 8.393499939529647e-08, + "af": 5.141641850059386e-07, + "oc": 0.00014594428648706526, + "ka": 2.2462170079506905e-07, + "be": 1.1407231568227871e-06, + "tg": 8.181845423393952e-09, + "sd": 1.3652669395014527e-06, + "gu": 5.95188289764792e-08, + "am": 1.9212912150123884e-07, + "yi": 4.511647148319753e-06, + "lo": 6.918835993019457e-07, + "uz": 1.9969322029300685e-10, + "fo": 4.0127424654201604e-06, + "ht": 0.00013288376794662327, + "ps": 2.7954624215453805e-07, + "tk": 5.733347130387756e-10, + "nn": 0.0004674476513173431, + "mt": 3.094294811489817e-07, + "sa": 2.4338512503163656e-06, + "lb": 4.8388461237891534e-08, + "my": 6.3624424910813104e-06, + "bo": 3.0289770620584022e-06, + "tl": 8.216320566134527e-07, + "mg": 1.7016615894149822e-09, + "as": 2.2815898148564884e-07, + "tt": 2.334998638886532e-09, + "haw": 5.896681614103727e-05, + "ln": 3.059158188989386e-05, + "ha": 5.262001057104726e-09, + "ba": 5.179647821762501e-10, + "jw": 7.281429861905053e-05, + "su": 7.760400655421051e-10 + } +} \ No newline at end of file diff --git a/tests/expected/tiny_fr.cpu/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny_fr.cpu/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..adc9890af4a61ebbb4b7beeef62b7b6307ee0338 --- /dev/null +++ b/tests/expected/tiny_fr.cpu/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,190 @@ +{ + "text": " Bonjour! Est-ce que vous allez bien? Bonjour! Bonjour! Est-ce que vous allez bien?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.46, + "end": 0.68, + "text": " Bonjour!", + "tokens": [ + 25431, + 2298 + ], + "temperature": 0.0, + "avg_logprob": -0.7739177703857422, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.04250079020857811, + "confidence": 0.69, + "words": [ + { + "text": "Bonjour!", + "start": 0.46, + "end": 0.68, + "confidence": 0.69 + } + ] + }, + { + "id": 1, + "seek": 148, + "start": 1.86, + "end": 2.8, + "text": " Est-ce que vous allez bien?", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.4815776131369851, + "compression_ratio": 0.7714285714285715, + "no_speech_prob": 0.04176269844174385, + "confidence": 0.632, + "words": [ + { + "text": "Est-ce", + "start": 1.86, + "end": 2.14, + "confidence": 0.548 + }, + { + "text": "que", + "start": 2.14, + "end": 2.18, + "confidence": 0.848 + }, + { + "text": "vous", + "start": 2.18, + "end": 2.34, + "confidence": 0.993 + }, + { + "text": "allez", + "start": 2.34, + "end": 2.48, + "confidence": 0.298 + }, + { + "text": "bien?", + "start": 2.48, + "end": 2.8, + "confidence": 0.979 + } + ] + }, + { + "id": 2, + "seek": 3148, + "start": 32.98, + "end": 33.16, + "text": " Bonjour!", + "tokens": [ + 25431, + 2298 + ], + "temperature": 0.0, + "avg_logprob": -0.34033950169881183, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.5495142936706543, + "confidence": 0.919, + "words": [ + { + "text": "Bonjour!", + "start": 32.98, + "end": 33.16, + "confidence": 0.919 + } + ] + }, + { + "id": 3, + "seek": 3348, + "start": 34.4, + "end": 34.61, + "text": " Bonjour!", + "tokens": [ + 25431, + 2298 + ], + "temperature": 0.0, + "avg_logprob": -0.23644089698791504, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.40533149242401123, + "confidence": 0.331, + "words": [ + { + "text": "Bonjour!", + "start": 34.4, + "end": 34.61, + "confidence": 0.331 + } + ] + }, + { + "id": 4, + "seek": 3448, + "start": 34.61, + "end": 35.34, + "text": " Est-ce que vous allez bien?", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.17279924045909534, + "compression_ratio": 0.7714285714285715, + "no_speech_prob": 0.5867945551872253, + "confidence": 0.871, + "words": [ + { + "text": "Est-ce", + "start": 34.61, + "end": 34.66, + "confidence": 0.741 + }, + { + "text": "que", + "start": 34.66, + "end": 34.72, + "confidence": 0.979 + }, + { + "text": "vous", + "start": 34.72, + "end": 34.86, + "confidence": 0.999 + }, + { + "text": "allez", + "start": 34.86, + "end": 35.0, + "confidence": 0.959 + }, + { + "text": "bien?", + "start": 35.0, + "end": 35.34, + "confidence": 1.0 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr.cpu/radio_short.mp3.words.json b/tests/expected/tiny_fr.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b1910cc5b3224406e946a7c50d9507d3dfe850fe --- /dev/null +++ b/tests/expected/tiny_fr.cpu/radio_short.mp3.words.json @@ -0,0 +1,104 @@ +{ + "text": "............", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 0.76, + "text": "...", + "tokens": [ + 50364, + 1097, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -1.4265364408493042, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.224, + "words": [ + { + "text": "...", + "start": 0.08, + "end": 0.76, + "confidence": 0.224 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.02, + "end": 69.4, + "text": "...", + "tokens": [ + 50364, + 1097, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.973953366279602, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.8780310153961182, + "confidence": 0.466, + "words": [ + { + "text": "...", + "start": 60.02, + "end": 69.4, + "confidence": 0.466 + } + ] + }, + { + "id": 2, + "seek": 9000, + "start": 90.02, + "end": 90.24, + "text": "...", + "tokens": [ + 1097 + ], + "temperature": 0.0, + "avg_logprob": -0.8283956527709961, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.641629695892334, + "confidence": 0.842, + "words": [ + { + "text": "...", + "start": 90.02, + "end": 90.24, + "confidence": 0.842 + } + ] + }, + { + "id": 3, + "seek": 10500, + "start": 105.04, + "end": 117.76, + "text": "...", + "tokens": [ + 50364, + 1097, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.728730320930481, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.25682491064071655, + "confidence": 0.764, + "words": [ + { + "text": "...", + "start": 105.04, + "end": 117.76, + "confidence": 0.764 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr.cpu/smartphone.mp3.words.json b/tests/expected/tiny_fr.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f90643caf0519b49ba0cbc0381cd8d8ced1f218b --- /dev/null +++ b/tests/expected/tiny_fr.cpu/smartphone.mp3.words.json @@ -0,0 +1,5038 @@ +{ + "text": " C'est évidence que dit Nicolas. Mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup très souvent ementionné. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes. C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très fondablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent. On s'est espécie de nous voter dans la relation à l'objet. C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de relation de médiation avec le monde qui rendent un peu avec la même sédiforme de le jeu. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses et de rojets. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure, en avoir besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira jamais? Et puis, il faut en remettre la faute sur les gens qui ont créé cette route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine, soit définissant des beurs, on définisse un signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 1.38, + "text": " C'est évidence que dit Nicolas.", + "tokens": [ + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.66, + "confidence": 0.849 + }, + { + "text": "évidence", + "start": 0.66, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.06, + "confidence": 0.883 + }, + { + "text": "dit", + "start": 1.06, + "end": 1.16, + "confidence": 0.344 + }, + { + "text": "Nicolas.", + "start": 1.16, + "end": 1.38, + "confidence": 0.921 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.66, + "end": 3.62, + "text": " Mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 6313, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 1254, + 425, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.745, + "words": [ + { + "text": "Mais", + "start": 1.66, + "end": 1.9, + "confidence": 0.956 + }, + { + "text": "je", + "start": 1.9, + "end": 2.18, + "confidence": 0.629 + }, + { + "text": "me", + "start": 2.18, + "end": 2.3, + "confidence": 0.943 + }, + { + "text": "l'étais", + "start": 2.3, + "end": 2.54, + "confidence": 0.708 + }, + { + "text": "jamais", + "start": 2.54, + "end": 2.78, + "confidence": 0.962 + }, + { + "text": "formulé", + "start": 2.78, + "end": 3.2, + "confidence": 0.541 + }, + { + "text": "comme", + "start": 3.2, + "end": 3.34, + "confidence": 0.975 + }, + { + "text": "ça.", + "start": 3.34, + "end": 3.62, + "confidence": 0.979 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 4.14, + "end": 8.82, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.762, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.22, + "confidence": 0.39 + }, + { + "text": "qui", + "start": 4.22, + "end": 4.34, + "confidence": 0.934 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.46, + "confidence": 0.735 + }, + { + "text": "la", + "start": 4.46, + "end": 4.56, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.56, + "end": 4.96, + "confidence": 0.933 + }, + { + "text": "du", + "start": 4.96, + "end": 5.16, + "confidence": 0.936 + }, + { + "text": "smartphone,", + "start": 5.16, + "end": 5.74, + "confidence": 0.909 + }, + { + "text": "c'est", + "start": 5.74, + "end": 6.12, + "confidence": 0.871 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.2, + "confidence": 0.982 + }, + { + "text": "seulement", + "start": 6.2, + "end": 6.52, + "confidence": 0.991 + }, + { + "text": "la", + "start": 6.52, + "end": 6.76, + "confidence": 0.627 + }, + { + "text": "cumulation", + "start": 6.76, + "end": 7.18, + "confidence": 0.679 + }, + { + "text": "des", + "start": 7.18, + "end": 7.54, + "confidence": 0.752 + }, + { + "text": "fonctions,", + "start": 7.54, + "end": 8.1, + "confidence": 0.826 + }, + { + "text": "mais", + "start": 8.1, + "end": 8.42, + "confidence": 0.511 + }, + { + "text": "la", + "start": 8.42, + "end": 8.58, + "confidence": 0.717 + }, + { + "text": "manière", + "start": 8.58, + "end": 8.82, + "confidence": 0.457 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 8.9, + "end": 10.84, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.3, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.04, + "confidence": 0.329 + }, + { + "text": "quelques", + "start": 9.04, + "end": 9.26, + "confidence": 0.282 + }, + { + "text": "interagues", + "start": 9.26, + "end": 10.18, + "confidence": 0.238 + }, + { + "text": "entraîne.", + "start": 10.18, + "end": 10.84, + "confidence": 0.421 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 11.04, + "end": 12.92, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "Et", + "start": 11.04, + "end": 11.1, + "confidence": 0.394 + }, + { + "text": "il", + "start": 11.1, + "end": 11.24, + "confidence": 0.128 + }, + { + "text": "est", + "start": 11.24, + "end": 11.38, + "confidence": 0.237 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.62, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.62, + "end": 11.78, + "confidence": 0.879 + }, + { + "text": "la", + "start": 11.78, + "end": 11.82, + "confidence": 0.968 + }, + { + "text": "photo", + "start": 11.82, + "end": 11.92, + "confidence": 0.811 + }, + { + "text": "c'est", + "start": 11.92, + "end": 12.18, + "confidence": 0.792 + }, + { + "text": "hyper", + "start": 12.18, + "end": 12.38, + "confidence": 0.939 + }, + { + "text": "convaincant.", + "start": 12.38, + "end": 12.92, + "confidence": 0.494 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.26, + "end": 18.03, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 38262, + 421, + 6, + 388, + 257, + 8862, + 8796 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.553, + "words": [ + { + "text": "Alors", + "start": 13.26, + "end": 13.48, + "confidence": 0.904 + }, + { + "text": "évidemment,", + "start": 13.48, + "end": 13.92, + "confidence": 0.785 + }, + { + "text": "il", + "start": 13.92, + "end": 14.4, + "confidence": 0.961 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.85 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.874 + }, + { + "text": "les", + "start": 15.38, + "end": 15.62, + "confidence": 0.933 + }, + { + "text": "interfaces", + "start": 15.62, + "end": 15.86, + "confidence": 0.359 + }, + { + "text": "les", + "start": 15.86, + "end": 16.5, + "confidence": 0.414 + }, + { + "text": "grand-attêtes", + "start": 16.5, + "end": 16.94, + "confidence": 0.178 + }, + { + "text": "qu'il", + "start": 16.94, + "end": 17.18, + "confidence": 0.632 + }, + { + "text": "a", + "start": 17.18, + "end": 17.24, + "confidence": 0.969 + }, + { + "text": "été", + "start": 17.24, + "end": 17.5, + "confidence": 0.957 + }, + { + "text": "beaucoup", + "start": 17.5, + "end": 18.03, + "confidence": 0.572 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 18.03, + "end": 19.26, + "text": " très souvent ementionné.", + "tokens": [ + 5732, + 20847, + 846, + 1251, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.541, + "words": [ + { + "text": "très", + "start": 18.03, + "end": 18.66, + "confidence": 0.959 + }, + { + "text": "souvent", + "start": 18.66, + "end": 18.82, + "confidence": 0.994 + }, + { + "text": "ementionné.", + "start": 18.82, + "end": 19.26, + "confidence": 0.365 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.86, + "end": 23.54, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.648, + "words": [ + { + "text": "Mais", + "start": 19.86, + "end": 20.22, + "confidence": 0.975 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.56, + "confidence": 0.479 + }, + { + "text": "il", + "start": 20.56, + "end": 20.6, + "confidence": 0.978 + }, + { + "text": "faudrait", + "start": 20.6, + "end": 20.74, + "confidence": 0.753 + }, + { + "text": "que", + "start": 20.74, + "end": 20.92, + "confidence": 0.378 + }, + { + "text": "les", + "start": 20.92, + "end": 20.98, + "confidence": 0.182 + }, + { + "text": "profites", + "start": 20.98, + "end": 21.26, + "confidence": 0.622 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.7, + "confidence": 0.502 + }, + { + "text": "de", + "start": 21.7, + "end": 21.84, + "confidence": 0.468 + }, + { + "text": "20", + "start": 21.84, + "end": 22.08, + "confidence": 0.924 + }, + { + "text": "ans", + "start": 22.08, + "end": 22.28, + "confidence": 0.937 + }, + { + "text": "pendant", + "start": 22.28, + "end": 22.46, + "confidence": 0.903 + }, + { + "text": "l'été,", + "start": 22.46, + "end": 22.96, + "confidence": 0.499 + }, + { + "text": "les", + "start": 22.96, + "end": 23.04, + "confidence": 0.861 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.934 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 23.58, + "end": 25.26, + "text": " nous ont appris à piquer sur des icônes.", + "tokens": [ + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.663, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.833 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.86, + "confidence": 0.976 + }, + { + "text": "appris", + "start": 23.86, + "end": 24.06, + "confidence": 0.947 + }, + { + "text": "à", + "start": 24.06, + "end": 24.24, + "confidence": 0.24 + }, + { + "text": "piquer", + "start": 24.24, + "end": 24.42, + "confidence": 0.45 + }, + { + "text": "sur", + "start": 24.42, + "end": 24.68, + "confidence": 0.749 + }, + { + "text": "des", + "start": 24.68, + "end": 24.8, + "confidence": 0.96 + }, + { + "text": "icônes.", + "start": 24.8, + "end": 25.26, + "confidence": 0.656 + } + ] + }, + { + "id": 9, + "seek": 2556, + "start": 25.58, + "end": 30.56, + "text": " C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible.", + "tokens": [ + 383, + 6, + 377, + 1769, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.643, + "words": [ + { + "text": "C'est", + "start": 25.58, + "end": 25.66, + "confidence": 0.449 + }, + { + "text": "ce", + "start": 25.66, + "end": 25.72, + "confidence": 0.491 + }, + { + "text": "que", + "start": 25.72, + "end": 25.94, + "confidence": 0.935 + }, + { + "text": "le", + "start": 25.94, + "end": 26.6, + "confidence": 0.377 + }, + { + "text": "smartphone", + "start": 26.6, + "end": 26.86, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 26.86, + "end": 27.42, + "confidence": 0.813 + }, + { + "text": "le", + "start": 27.42, + "end": 27.64, + "confidence": 0.957 + }, + { + "text": "toucher,", + "start": 27.64, + "end": 28.06, + "confidence": 0.73 + }, + { + "text": "qui", + "start": 28.06, + "end": 28.18, + "confidence": 0.209 + }, + { + "text": "rend", + "start": 28.18, + "end": 28.34, + "confidence": 0.877 + }, + { + "text": "le", + "start": 28.34, + "end": 28.68, + "confidence": 0.991 + }, + { + "text": "contact", + "start": 28.68, + "end": 28.96, + "confidence": 0.854 + }, + { + "text": "plus", + "start": 28.96, + "end": 29.48, + "confidence": 0.865 + }, + { + "text": "direct,", + "start": 29.48, + "end": 29.96, + "confidence": 0.692 + }, + { + "text": "plus", + "start": 29.96, + "end": 30.24, + "confidence": 0.928 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.56, + "confidence": 0.332 + } + ] + }, + { + "id": 10, + "seek": 2556, + "start": 31.04, + "end": 34.34, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner", + "tokens": [ + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.827, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.914 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.34, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.64, + "confidence": 0.378 + }, + { + "text": "il", + "start": 31.64, + "end": 31.78, + "confidence": 0.935 + }, + { + "text": "faudrait", + "start": 31.78, + "end": 31.88, + "confidence": 0.99 + }, + { + "text": "parler", + "start": 31.88, + "end": 32.12, + "confidence": 0.882 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.3, + "confidence": 0.889 + }, + { + "text": "des", + "start": 32.3, + "end": 32.44, + "confidence": 0.927 + }, + { + "text": "applications", + "start": 32.44, + "end": 32.78, + "confidence": 0.839 + }, + { + "text": "qui", + "start": 32.78, + "end": 33.16, + "confidence": 0.652 + }, + { + "text": "permettent", + "start": 33.16, + "end": 33.68, + "confidence": 0.951 + }, + { + "text": "de", + "start": 33.68, + "end": 33.9, + "confidence": 0.952 + }, + { + "text": "contourner", + "start": 33.9, + "end": 34.34, + "confidence": 0.787 + } + ] + }, + { + "id": 11, + "seek": 2556, + "start": 34.34, + "end": 37.72, + "text": " le côté tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 476, + 18437, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.766, + "words": [ + { + "text": "le", + "start": 34.34, + "end": 34.52, + "confidence": 0.989 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.72, + "confidence": 0.983 + }, + { + "text": "tout", + "start": 34.72, + "end": 34.96, + "confidence": 0.954 + }, + { + "text": "flu", + "start": 34.96, + "end": 35.2, + "confidence": 0.548 + }, + { + "text": "de", + "start": 35.2, + "end": 35.42, + "confidence": 0.248 + }, + { + "text": "la", + "start": 35.42, + "end": 35.72, + "confidence": 0.902 + }, + { + "text": "navigation", + "start": 35.72, + "end": 36.04, + "confidence": 0.913 + }, + { + "text": "web", + "start": 36.04, + "end": 36.64, + "confidence": 0.93 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.76, + "confidence": 0.746 + }, + { + "text": "aller", + "start": 36.76, + "end": 36.94, + "confidence": 0.992 + }, + { + "text": "directement", + "start": 36.94, + "end": 37.46, + "confidence": 0.986 + }, + { + "text": "en", + "start": 37.46, + "end": 37.68, + "confidence": 0.644 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.72, + "confidence": 0.689 + } + ] + }, + { + "id": 12, + "seek": 2556, + "start": 37.72, + "end": 43.06, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.725, + "words": [ + { + "text": "Bref,", + "start": 37.72, + "end": 38.9, + "confidence": 0.967 + }, + { + "text": "tout", + "start": 38.9, + "end": 38.98, + "confidence": 0.786 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.36, + "confidence": 0.985 + }, + { + "text": "ce", + "start": 39.36, + "end": 39.68, + "confidence": 0.929 + }, + { + "text": "sont", + "start": 39.68, + "end": 39.84, + "confidence": 0.963 + }, + { + "text": "les", + "start": 39.84, + "end": 40.06, + "confidence": 0.976 + }, + { + "text": "conditions", + "start": 40.06, + "end": 40.54, + "confidence": 0.96 + }, + { + "text": "qui", + "start": 40.54, + "end": 40.9, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.9, + "end": 41.54, + "confidence": 0.986 + }, + { + "text": "de", + "start": 41.54, + "end": 41.58, + "confidence": 0.989 + }, + { + "text": "créer", + "start": 41.58, + "end": 41.92, + "confidence": 0.918 + }, + { + "text": "cette", + "start": 41.92, + "end": 42.3, + "confidence": 0.518 + }, + { + "text": "objet,", + "start": 42.3, + "end": 42.7, + "confidence": 0.213 + }, + { + "text": "en", + "start": 42.7, + "end": 42.8, + "confidence": 0.191 + }, + { + "text": "Nicolas,", + "start": 42.8, + "end": 43.06, + "confidence": 0.508 + } + ] + }, + { + "id": 13, + "seek": 2556, + "start": 43.1, + "end": 46.48, + "text": " dit qu'il est très fondablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 6176, + 421, + 6, + 388, + 871, + 5732, + 9557, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.768, + "words": [ + { + "text": "dit", + "start": 43.1, + "end": 43.52, + "confidence": 0.551 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.68, + "confidence": 0.969 + }, + { + "text": "est", + "start": 43.68, + "end": 43.8, + "confidence": 0.851 + }, + { + "text": "très", + "start": 43.8, + "end": 43.94, + "confidence": 0.368 + }, + { + "text": "fondablement", + "start": 43.94, + "end": 44.68, + "confidence": 0.593 + }, + { + "text": "inédit", + "start": 44.68, + "end": 45.52, + "confidence": 0.66 + }, + { + "text": "dans", + "start": 45.52, + "end": 45.74, + "confidence": 0.778 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 45.98, + "confidence": 0.825 + }, + { + "text": "de", + "start": 45.98, + "end": 46.08, + "confidence": 0.978 + }, + { + "text": "l'humanité.", + "start": 46.08, + "end": 46.48, + "confidence": 0.991 + } + ] + }, + { + "id": 14, + "seek": 2556, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.16, + "confidence": 0.939 + }, + { + "text": "ça", + "start": 47.16, + "end": 47.46, + "confidence": 0.854 + }, + { + "text": "s'assoulait", + "start": 47.46, + "end": 47.78, + "confidence": 0.591 + }, + { + "text": "d'une", + "start": 47.78, + "end": 48.02, + "confidence": 0.619 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.14, + "confidence": 0.966 + }, + { + "text": "interrogation.", + "start": 48.14, + "end": 48.76, + "confidence": 0.833 + } + ] + }, + { + "id": 15, + "seek": 2556, + "start": 49.26, + "end": 54.34, + "text": " Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 530, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Est-ce", + "start": 49.26, + "end": 49.7, + "confidence": 0.804 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.989 + }, + { + "text": "le", + "start": 49.74, + "end": 49.78, + "confidence": 0.882 + }, + { + "text": "fait", + "start": 49.78, + "end": 49.96, + "confidence": 0.962 + }, + { + "text": "que", + "start": 49.96, + "end": 50.08, + "confidence": 0.935 + }, + { + "text": "cette", + "start": 50.08, + "end": 50.24, + "confidence": 0.95 + }, + { + "text": "objet", + "start": 50.24, + "end": 50.5, + "confidence": 0.968 + }, + { + "text": "soit", + "start": 50.5, + "end": 50.96, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 50.96, + "end": 51.78, + "confidence": 0.91 + }, + { + "text": "un", + "start": 51.78, + "end": 52.06, + "confidence": 0.442 + }, + { + "text": "d'huies", + "start": 52.06, + "end": 52.32, + "confidence": 0.298 + }, + { + "text": "que", + "start": 52.32, + "end": 52.4, + "confidence": 0.966 + }, + { + "text": "notre", + "start": 52.4, + "end": 52.58, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.58, + "end": 53.1, + "confidence": 0.714 + }, + { + "text": "a", + "start": 53.1, + "end": 53.44, + "confidence": 0.522 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.62, + "confidence": 0.659 + }, + { + "text": "est", + "start": 53.62, + "end": 54.0, + "confidence": 0.643 + }, + { + "text": "aussi", + "start": 54.0, + "end": 54.34, + "confidence": 0.808 + } + ] + }, + { + "id": 16, + "seek": 2556, + "start": 54.34, + "end": 55.32, + "text": " un rapport inédit?", + "tokens": [ + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.849, + "words": [ + { + "text": "un", + "start": 54.34, + "end": 54.72, + "confidence": 0.506 + }, + { + "text": "rapport", + "start": 54.72, + "end": 54.94, + "confidence": 0.999 + }, + { + "text": "inédit?", + "start": 54.94, + "end": 55.32, + "confidence": 0.956 + } + ] + }, + { + "id": 17, + "seek": 5548, + "start": 55.6, + "end": 58.6, + "text": " Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 6311, + 712, + 1531, + 22829 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.626, + "words": [ + { + "text": "Je", + "start": 55.6, + "end": 55.84, + "confidence": 0.335 + }, + { + "text": "veux", + "start": 55.84, + "end": 55.94, + "confidence": 0.432 + }, + { + "text": "dire,", + "start": 55.94, + "end": 56.16, + "confidence": 0.992 + }, + { + "text": "est-ce", + "start": 56.16, + "end": 56.34, + "confidence": 0.951 + }, + { + "text": "que", + "start": 56.34, + "end": 56.4, + "confidence": 0.973 + }, + { + "text": "le", + "start": 56.4, + "end": 56.54, + "confidence": 0.987 + }, + { + "text": "rapport", + "start": 56.54, + "end": 56.76, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.76, + "end": 57.1, + "confidence": 0.906 + }, + { + "text": "a", + "start": 57.1, + "end": 57.14, + "confidence": 0.974 + }, + { + "text": "au", + "start": 57.14, + "end": 57.26, + "confidence": 0.319 + }, + { + "text": "sein", + "start": 57.26, + "end": 57.36, + "confidence": 0.244 + }, + { + "text": "de", + "start": 57.36, + "end": 57.44, + "confidence": 0.177 + }, + { + "text": "foi", + "start": 57.44, + "end": 57.54, + "confidence": 0.163 + }, + { + "text": "n'est", + "start": 57.54, + "end": 57.78, + "confidence": 0.777 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.32, + "confidence": 0.606 + }, + { + "text": "à", + "start": 58.32, + "end": 58.46, + "confidence": 0.482 + }, + { + "text": "celui", + "start": 58.46, + "end": 58.6, + "confidence": 0.831 + } + ] + }, + { + "id": 18, + "seek": 5548, + "start": 58.72, + "end": 62.81, + "text": " qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone?", + "tokens": [ + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.781, + "words": [ + { + "text": "qu'on", + "start": 58.72, + "end": 58.88, + "confidence": 0.941 + }, + { + "text": "entretenait", + "start": 58.88, + "end": 59.38, + "confidence": 0.657 + }, + { + "text": "à", + "start": 59.38, + "end": 59.44, + "confidence": 0.973 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.64, + "confidence": 0.909 + }, + { + "text": "objectes", + "start": 59.64, + "end": 60.02, + "confidence": 0.528 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.38, + "confidence": 0.547 + }, + { + "text": "comme", + "start": 60.38, + "end": 60.86, + "confidence": 0.662 + }, + { + "text": "la", + "start": 60.86, + "end": 61.46, + "confidence": 0.898 + }, + { + "text": "voiture", + "start": 61.46, + "end": 61.8, + "confidence": 0.954 + }, + { + "text": "ou", + "start": 61.8, + "end": 62.32, + "confidence": 0.74 + }, + { + "text": "le", + "start": 62.32, + "end": 62.6, + "confidence": 0.847 + }, + { + "text": "téléphone?", + "start": 62.6, + "end": 62.81, + "confidence": 0.979 + } + ] + }, + { + "id": 19, + "seek": 5548, + "start": 62.81, + "end": 66.07, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.882, + "words": [ + { + "text": "Il", + "start": 62.81, + "end": 65.42, + "confidence": 0.866 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.52, + "confidence": 0.865 + }, + { + "text": "a", + "start": 65.52, + "end": 65.56, + "confidence": 0.965 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.62, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.62, + "end": 66.07, + "confidence": 0.858 + } + ] + }, + { + "id": 20, + "seek": 5548, + "start": 66.07, + "end": 69.74, + "text": " On s'est espécie de nous voter dans la relation à l'objet.", + "tokens": [ + 1282, + 262, + 6, + 377, + 7089, + 526, + 4260, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.552, + "words": [ + { + "text": "On", + "start": 66.07, + "end": 67.0, + "confidence": 0.29 + }, + { + "text": "s'est", + "start": 67.0, + "end": 67.26, + "confidence": 0.562 + }, + { + "text": "espécie", + "start": 67.26, + "end": 67.46, + "confidence": 0.233 + }, + { + "text": "de", + "start": 67.46, + "end": 67.58, + "confidence": 0.963 + }, + { + "text": "nous", + "start": 67.58, + "end": 67.8, + "confidence": 0.631 + }, + { + "text": "voter", + "start": 67.8, + "end": 68.24, + "confidence": 0.241 + }, + { + "text": "dans", + "start": 68.24, + "end": 68.68, + "confidence": 0.853 + }, + { + "text": "la", + "start": 68.68, + "end": 68.88, + "confidence": 0.61 + }, + { + "text": "relation", + "start": 68.88, + "end": 69.22, + "confidence": 0.932 + }, + { + "text": "à", + "start": 69.22, + "end": 69.34, + "confidence": 0.792 + }, + { + "text": "l'objet.", + "start": 69.34, + "end": 69.74, + "confidence": 0.889 + } + ] + }, + { + "id": 21, + "seek": 5548, + "start": 70.18, + "end": 74.78, + "text": " C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs", + "tokens": [ + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 10521, + 537, + 394, + 6992, + 421, + 6, + 266, + 257, + 9995, + 368, + 11, + 5173, + 476, + 1266, + 11, + 1512, + 33643, + 25929 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.491, + "words": [ + { + "text": "C'est", + "start": 70.18, + "end": 70.34, + "confidence": 0.907 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.72, + "confidence": 0.527 + }, + { + "text": "éterréciant", + "start": 70.72, + "end": 71.56, + "confidence": 0.277 + }, + { + "text": "parce", + "start": 71.56, + "end": 71.82, + "confidence": 0.298 + }, + { + "text": "qu'on", + "start": 71.82, + "end": 72.3, + "confidence": 0.775 + }, + { + "text": "a", + "start": 72.3, + "end": 72.4, + "confidence": 0.563 + }, + { + "text": "impression", + "start": 72.4, + "end": 72.74, + "confidence": 0.148 + }, + { + "text": "de,", + "start": 72.74, + "end": 73.36, + "confidence": 0.211 + }, + { + "text": "comme", + "start": 73.36, + "end": 73.8, + "confidence": 0.843 + }, + { + "text": "le", + "start": 73.8, + "end": 73.96, + "confidence": 0.9 + }, + { + "text": "10,", + "start": 73.96, + "end": 74.28, + "confidence": 0.395 + }, + { + "text": "les", + "start": 74.28, + "end": 74.32, + "confidence": 0.581 + }, + { + "text": "utilisateurs", + "start": 74.32, + "end": 74.78, + "confidence": 0.745 + } + ] + }, + { + "id": 22, + "seek": 5548, + "start": 74.8, + "end": 77.93, + "text": " et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de", + "tokens": [ + 1030, + 1512, + 6484, + 11, + 23576, + 257, + 1078, + 45768, + 394, + 368, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 11, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.464, + "words": [ + { + "text": "et", + "start": 74.8, + "end": 74.92, + "confidence": 0.147 + }, + { + "text": "les", + "start": 74.92, + "end": 74.96, + "confidence": 0.242 + }, + { + "text": "efforts,", + "start": 74.96, + "end": 75.22, + "confidence": 0.108 + }, + { + "text": "elles", + "start": 75.22, + "end": 75.32, + "confidence": 0.108 + }, + { + "text": "aident", + "start": 75.32, + "end": 75.44, + "confidence": 0.289 + }, + { + "text": "dépendant", + "start": 75.44, + "end": 76.0, + "confidence": 0.576 + }, + { + "text": "de", + "start": 76.0, + "end": 76.16, + "confidence": 0.301 + }, + { + "text": "cette", + "start": 76.16, + "end": 76.2, + "confidence": 0.519 + }, + { + "text": "objet", + "start": 76.2, + "end": 76.48, + "confidence": 0.961 + }, + { + "text": "d'un", + "start": 76.48, + "end": 76.86, + "confidence": 0.883 + }, + { + "text": "lieu,", + "start": 76.86, + "end": 77.06, + "confidence": 0.333 + }, + { + "text": "en", + "start": 77.06, + "end": 77.18, + "confidence": 0.837 + }, + { + "text": "fait,", + "start": 77.18, + "end": 77.42, + "confidence": 0.958 + }, + { + "text": "une", + "start": 77.42, + "end": 77.46, + "confidence": 0.57 + }, + { + "text": "espèce", + "start": 77.46, + "end": 77.74, + "confidence": 0.961 + }, + { + "text": "de", + "start": 77.74, + "end": 77.93, + "confidence": 0.601 + } + ] + }, + { + "id": 23, + "seek": 5548, + "start": 77.93, + "end": 82.98, + "text": " relation de médiation avec le monde qui rendent un peu avec la même sédiforme de", + "tokens": [ + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 517, + 5604, + 4163, + 635, + 5698, + 262, + 7811, + 8629, + 68, + 368 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.556, + "words": [ + { + "text": "relation", + "start": 77.93, + "end": 78.32, + "confidence": 0.839 + }, + { + "text": "de", + "start": 78.32, + "end": 78.7, + "confidence": 0.723 + }, + { + "text": "médiation", + "start": 78.7, + "end": 79.4, + "confidence": 0.872 + }, + { + "text": "avec", + "start": 79.4, + "end": 79.74, + "confidence": 0.964 + }, + { + "text": "le", + "start": 79.74, + "end": 79.88, + "confidence": 0.985 + }, + { + "text": "monde", + "start": 79.88, + "end": 80.24, + "confidence": 0.909 + }, + { + "text": "qui", + "start": 80.24, + "end": 81.02, + "confidence": 0.871 + }, + { + "text": "rendent", + "start": 81.02, + "end": 81.74, + "confidence": 0.623 + }, + { + "text": "un", + "start": 81.74, + "end": 81.84, + "confidence": 0.224 + }, + { + "text": "peu", + "start": 81.84, + "end": 81.88, + "confidence": 0.223 + }, + { + "text": "avec", + "start": 81.88, + "end": 82.1, + "confidence": 0.863 + }, + { + "text": "la", + "start": 82.1, + "end": 82.24, + "confidence": 0.565 + }, + { + "text": "même", + "start": 82.24, + "end": 82.32, + "confidence": 0.384 + }, + { + "text": "sédiforme", + "start": 82.32, + "end": 82.86, + "confidence": 0.29 + }, + { + "text": "de", + "start": 82.86, + "end": 82.98, + "confidence": 0.899 + } + ] + }, + { + "id": 24, + "seek": 8298, + "start": 83.0, + "end": 87.66, + "text": " le jeu. Donc, à objets inédits, rapport inédits.", + "tokens": [ + 476, + 16748, + 13, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.614, + "words": [ + { + "text": "le", + "start": 83.0, + "end": 83.12, + "confidence": 0.128 + }, + { + "text": "jeu.", + "start": 83.12, + "end": 83.64, + "confidence": 0.505 + }, + { + "text": "Donc,", + "start": 83.64, + "end": 84.48, + "confidence": 0.513 + }, + { + "text": "à", + "start": 84.48, + "end": 84.86, + "confidence": 0.634 + }, + { + "text": "objets", + "start": 84.86, + "end": 85.36, + "confidence": 0.547 + }, + { + "text": "inédits,", + "start": 85.36, + "end": 86.24, + "confidence": 0.73 + }, + { + "text": "rapport", + "start": 86.24, + "end": 86.62, + "confidence": 0.928 + }, + { + "text": "inédits.", + "start": 86.62, + "end": 87.66, + "confidence": 0.921 + } + ] + }, + { + "id": 25, + "seek": 8298, + "start": 88.08, + "end": 93.78, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses", + "tokens": [ + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 27998, + 9085 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.624, + "words": [ + { + "text": "Et,", + "start": 88.08, + "end": 88.3, + "confidence": 0.87 + }, + { + "text": "ce", + "start": 88.3, + "end": 88.8, + "confidence": 0.462 + }, + { + "text": "rapport,", + "start": 88.8, + "end": 89.28, + "confidence": 0.997 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.913 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.771 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.84, + "confidence": 0.313 + }, + { + "text": "Nicolas,", + "start": 89.84, + "end": 90.54, + "confidence": 0.358 + }, + { + "text": "frère", + "start": 90.54, + "end": 91.06, + "confidence": 0.405 + }, + { + "text": "caractérisée", + "start": 91.06, + "end": 91.7, + "confidence": 0.567 + }, + { + "text": "par", + "start": 91.7, + "end": 92.12, + "confidence": 0.868 + }, + { + "text": "un", + "start": 92.12, + "end": 92.32, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.32, + "end": 92.96, + "confidence": 0.932 + }, + { + "text": "de", + "start": 92.96, + "end": 93.24, + "confidence": 0.812 + }, + { + "text": "dépenses", + "start": 93.24, + "end": 93.78, + "confidence": 0.404 + } + ] + }, + { + "id": 26, + "seek": 8298, + "start": 94.36, + "end": 94.98, + "text": " et de rojets.", + "tokens": [ + 1030, + 368, + 744, + 73, + 1385, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.583, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.986 + }, + { + "text": "de", + "start": 94.52, + "end": 94.56, + "confidence": 0.996 + }, + { + "text": "rojets.", + "start": 94.56, + "end": 94.98, + "confidence": 0.41 + } + ] + }, + { + "id": 27, + "seek": 8298, + "start": 95.8, + "end": 100.4, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 2657, + 279 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.749, + "words": [ + { + "text": "Bon,", + "start": 95.8, + "end": 96.06, + "confidence": 0.71 + }, + { + "text": "en", + "start": 96.06, + "end": 96.54, + "confidence": 0.92 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.88, + "confidence": 0.994 + }, + { + "text": "il", + "start": 96.88, + "end": 97.1, + "confidence": 0.985 + }, + { + "text": "faudrait", + "start": 97.1, + "end": 97.5, + "confidence": 0.932 + }, + { + "text": "remonter", + "start": 97.5, + "end": 98.02, + "confidence": 0.574 + }, + { + "text": "très", + "start": 98.02, + "end": 98.46, + "confidence": 0.984 + }, + { + "text": "très", + "start": 98.46, + "end": 98.76, + "confidence": 0.597 + }, + { + "text": "finement", + "start": 98.76, + "end": 99.36, + "confidence": 0.476 + }, + { + "text": "toute", + "start": 99.36, + "end": 99.68, + "confidence": 0.367 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.02, + "confidence": 0.909 + }, + { + "text": "des", + "start": 100.02, + "end": 100.2, + "confidence": 0.949 + }, + { + "text": "objectes", + "start": 100.2, + "end": 100.4, + "confidence": 0.805 + } + ] + }, + { + "id": 28, + "seek": 8298, + "start": 100.4, + "end": 105.14, + "text": " techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement", + "tokens": [ + 7512, + 1030, + 368, + 9580, + 1536, + 526, + 2405, + 2680, + 476, + 4941, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.665, + "words": [ + { + "text": "techniques", + "start": 100.4, + "end": 101.02, + "confidence": 0.954 + }, + { + "text": "et", + "start": 101.02, + "end": 101.52, + "confidence": 0.967 + }, + { + "text": "de", + "start": 101.52, + "end": 101.64, + "confidence": 0.973 + }, + { + "text": "leur", + "start": 101.64, + "end": 101.78, + "confidence": 0.811 + }, + { + "text": "infération", + "start": 101.78, + "end": 102.22, + "confidence": 0.227 + }, + { + "text": "dans", + "start": 102.22, + "end": 102.46, + "confidence": 0.518 + }, + { + "text": "le", + "start": 102.46, + "end": 102.54, + "confidence": 0.511 + }, + { + "text": "vie", + "start": 102.54, + "end": 102.78, + "confidence": 0.612 + }, + { + "text": "pour", + "start": 102.78, + "end": 103.06, + "confidence": 0.933 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.64, + "confidence": 0.954 + }, + { + "text": "si", + "start": 103.64, + "end": 103.74, + "confidence": 0.488 + }, + { + "text": "ce", + "start": 103.74, + "end": 103.86, + "confidence": 0.98 + }, + { + "text": "rapport", + "start": 103.86, + "end": 104.1, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.1, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.14, + "confidence": 0.854 + } + ] + }, + { + "id": 29, + "seek": 8298, + "start": 105.26, + "end": 105.78, + "text": " inédit.", + "tokens": [ + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.943, + "words": [ + { + "text": "inédit.", + "start": 105.26, + "end": 105.78, + "confidence": 0.943 + } + ] + }, + { + "id": 30, + "seek": 8298, + "start": 106.14, + "end": 109.36, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.798, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.34, + "confidence": 0.872 + }, + { + "text": "j'ai", + "start": 106.34, + "end": 106.9, + "confidence": 0.924 + }, + { + "text": "l'impression", + "start": 106.9, + "end": 107.28, + "confidence": 0.967 + }, + { + "text": "comme", + "start": 107.28, + "end": 107.5, + "confidence": 0.709 + }, + { + "text": "ça", + "start": 107.5, + "end": 107.68, + "confidence": 0.955 + }, + { + "text": "que", + "start": 107.68, + "end": 107.96, + "confidence": 0.91 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.36, + "confidence": 0.986 + }, + { + "text": "se", + "start": 108.36, + "end": 108.66, + "confidence": 0.569 + }, + { + "text": "trompe", + "start": 108.66, + "end": 109.0, + "confidence": 0.532 + }, + { + "text": "pas", + "start": 109.0, + "end": 109.1, + "confidence": 0.715 + }, + { + "text": "vraiment.", + "start": 109.1, + "end": 109.36, + "confidence": 0.923 + } + ] + }, + { + "id": 31, + "seek": 10970, + "start": 109.72, + "end": 114.26, + "text": " Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même", + "tokens": [ + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.829, + "words": [ + { + "text": "Pour", + "start": 109.72, + "end": 110.08, + "confidence": 0.92 + }, + { + "text": "autant,", + "start": 110.08, + "end": 110.36, + "confidence": 0.986 + }, + { + "text": "je", + "start": 110.36, + "end": 110.4, + "confidence": 0.894 + }, + { + "text": "sache.", + "start": 110.4, + "end": 110.86, + "confidence": 0.531 + }, + { + "text": "Il", + "start": 110.86, + "end": 111.16, + "confidence": 0.851 + }, + { + "text": "y", + "start": 111.16, + "end": 111.22, + "confidence": 0.872 + }, + { + "text": "a", + "start": 111.22, + "end": 111.28, + "confidence": 0.939 + }, + { + "text": "eu", + "start": 111.28, + "end": 111.38, + "confidence": 0.891 + }, + { + "text": "plein", + "start": 111.38, + "end": 111.76, + "confidence": 0.833 + }, + { + "text": "de", + "start": 111.76, + "end": 112.04, + "confidence": 0.94 + }, + { + "text": "discussions", + "start": 112.04, + "end": 112.46, + "confidence": 0.711 + }, + { + "text": "autour", + "start": 112.46, + "end": 112.96, + "confidence": 0.97 + }, + { + "text": "de", + "start": 112.96, + "end": 113.46, + "confidence": 0.952 + }, + { + "text": "la", + "start": 113.46, + "end": 113.5, + "confidence": 0.955 + }, + { + "text": "voiture", + "start": 113.5, + "end": 113.8, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.8, + "end": 114.02, + "confidence": 0.523 + }, + { + "text": "même", + "start": 114.02, + "end": 114.26, + "confidence": 0.963 + } + ] + }, + { + "id": 32, + "seek": 10970, + "start": 114.42, + "end": 118.76, + "text": " du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en", + "tokens": [ + 1581, + 47159, + 13, + 6313, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.732, + "words": [ + { + "text": "du", + "start": 114.42, + "end": 114.6, + "confidence": 0.677 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 115.16, + "confidence": 0.983 + }, + { + "text": "Mais", + "start": 115.16, + "end": 115.72, + "confidence": 0.649 + }, + { + "text": "la", + "start": 115.72, + "end": 116.0, + "confidence": 0.809 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.38, + "confidence": 0.766 + }, + { + "text": "n'était", + "start": 116.38, + "end": 116.62, + "confidence": 0.952 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.02, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.02, + "end": 117.16, + "confidence": 0.98 + }, + { + "text": "même", + "start": 117.16, + "end": 117.32, + "confidence": 0.944 + }, + { + "text": "mort,", + "start": 117.32, + "end": 117.66, + "confidence": 0.519 + }, + { + "text": "donc", + "start": 117.66, + "end": 117.78, + "confidence": 0.871 + }, + { + "text": "le", + "start": 117.78, + "end": 118.32, + "confidence": 0.932 + }, + { + "text": "rejet", + "start": 118.32, + "end": 118.62, + "confidence": 0.524 + }, + { + "text": "n'en", + "start": 118.62, + "end": 118.76, + "confidence": 0.472 + } + ] + }, + { + "id": 33, + "seek": 10970, + "start": 118.76, + "end": 119.66, + "text": " plus n'était pas du même mort.", + "tokens": [ + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.853, + "words": [ + { + "text": "plus", + "start": 118.76, + "end": 118.9, + "confidence": 0.313 + }, + { + "text": "n'était", + "start": 118.9, + "end": 119.1, + "confidence": 0.973 + }, + { + "text": "pas", + "start": 119.1, + "end": 119.26, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.26, + "end": 119.36, + "confidence": 0.988 + }, + { + "text": "même", + "start": 119.36, + "end": 119.5, + "confidence": 0.999 + }, + { + "text": "mort.", + "start": 119.5, + "end": 119.66, + "confidence": 0.984 + } + ] + }, + { + "id": 34, + "seek": 10970, + "start": 120.06, + "end": 122.94, + "text": " On peut adorer sa bagnure, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.753, + "words": [ + { + "text": "On", + "start": 120.06, + "end": 120.22, + "confidence": 0.969 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.32, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.32, + "end": 120.64, + "confidence": 0.854 + }, + { + "text": "sa", + "start": 120.64, + "end": 120.86, + "confidence": 0.918 + }, + { + "text": "bagnure,", + "start": 120.86, + "end": 121.38, + "confidence": 0.368 + }, + { + "text": "en", + "start": 121.38, + "end": 121.52, + "confidence": 0.56 + }, + { + "text": "avoir", + "start": 121.52, + "end": 121.66, + "confidence": 0.969 + }, + { + "text": "besoin", + "start": 121.66, + "end": 122.08, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 122.08, + "end": 122.34, + "confidence": 0.92 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.64, + "confidence": 0.9 + }, + { + "text": "de", + "start": 122.64, + "end": 122.78, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.78, + "end": 122.94, + "confidence": 0.994 + } + ] + }, + { + "id": 35, + "seek": 10970, + "start": 123.36, + "end": 126.38, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.826, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.48, + "confidence": 0.557 + }, + { + "text": "là,", + "start": 123.48, + "end": 123.66, + "confidence": 0.527 + }, + { + "text": "le", + "start": 123.66, + "end": 124.02, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.02, + "end": 124.5, + "confidence": 0.966 + }, + { + "text": "quand", + "start": 124.5, + "end": 124.9, + "confidence": 0.774 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.986 + }, + { + "text": "va", + "start": 125.02, + "end": 125.1, + "confidence": 0.968 + }, + { + "text": "se", + "start": 125.1, + "end": 125.2, + "confidence": 0.862 + }, + { + "text": "coucher,", + "start": 125.2, + "end": 125.64, + "confidence": 0.791 + }, + { + "text": "on", + "start": 125.64, + "end": 126.04, + "confidence": 0.974 + }, + { + "text": "la", + "start": 126.04, + "end": 126.12, + "confidence": 0.783 + }, + { + "text": "laisse.", + "start": 126.12, + "end": 126.38, + "confidence": 0.981 + } + ] + }, + { + "id": 36, + "seek": 10970, + "start": 127.06, + "end": 130.0, + "text": " On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot.", + "tokens": [ + 1282, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 11, + 6932, + 322, + 871, + 1263, + 271, + 11, + 6932, + 322, + 297, + 6, + 268, + 275, + 18832, + 1736, + 1609, + 417, + 6471, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.66, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.3, + "confidence": 0.742 + }, + { + "text": "l'a", + "start": 127.3, + "end": 127.46, + "confidence": 0.714 + }, + { + "text": "pas", + "start": 127.46, + "end": 127.64, + "confidence": 0.99 + }, + { + "text": "dans", + "start": 127.64, + "end": 127.82, + "confidence": 0.967 + }, + { + "text": "la", + "start": 127.82, + "end": 127.94, + "confidence": 0.956 + }, + { + "text": "main,", + "start": 127.94, + "end": 128.3, + "confidence": 0.978 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.44, + "confidence": 0.967 + }, + { + "text": "on", + "start": 128.44, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.66, + "confidence": 0.602 + }, + { + "text": "collis,", + "start": 128.66, + "end": 129.2, + "confidence": 0.346 + }, + { + "text": "quand", + "start": 129.2, + "end": 129.24, + "confidence": 0.524 + }, + { + "text": "on", + "start": 129.24, + "end": 129.32, + "confidence": 0.991 + }, + { + "text": "n'en", + "start": 129.32, + "end": 129.38, + "confidence": 0.537 + }, + { + "text": "mène", + "start": 129.38, + "end": 129.56, + "confidence": 0.434 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.72, + "confidence": 0.998 + }, + { + "text": "au", + "start": 129.72, + "end": 129.82, + "confidence": 0.811 + }, + { + "text": "chiot.", + "start": 129.82, + "end": 130.0, + "confidence": 0.473 + } + ] + }, + { + "id": 37, + "seek": 10970, + "start": 130.88, + "end": 135.1, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une", + "tokens": [ + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.672, + "words": [ + { + "text": "On", + "start": 130.88, + "end": 131.0, + "confidence": 0.991 + }, + { + "text": "pouvait", + "start": 131.0, + "end": 131.24, + "confidence": 0.532 + }, + { + "text": "être", + "start": 131.24, + "end": 131.7, + "confidence": 0.545 + }, + { + "text": "émervé", + "start": 131.7, + "end": 132.2, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.2, + "end": 132.42, + "confidence": 0.82 + }, + { + "text": "son", + "start": 132.42, + "end": 132.68, + "confidence": 0.448 + }, + { + "text": "mome", + "start": 132.68, + "end": 133.04, + "confidence": 0.261 + }, + { + "text": "qui", + "start": 133.04, + "end": 133.26, + "confidence": 0.901 + }, + { + "text": "occupeait", + "start": 133.26, + "end": 133.72, + "confidence": 0.641 + }, + { + "text": "la", + "start": 133.72, + "end": 133.8, + "confidence": 0.8 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.0, + "confidence": 0.976 + }, + { + "text": "de", + "start": 134.0, + "end": 134.14, + "confidence": 0.964 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.44, + "confidence": 0.977 + }, + { + "text": "pendant", + "start": 134.44, + "end": 134.8, + "confidence": 0.92 + }, + { + "text": "une", + "start": 134.8, + "end": 135.1, + "confidence": 0.838 + } + ] + }, + { + "id": 38, + "seek": 10970, + "start": 135.1, + "end": 136.84, + "text": " heure chaque soir pour discuter avec un copain.", + "tokens": [ + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.922, + "words": [ + { + "text": "heure", + "start": 135.1, + "end": 135.36, + "confidence": 0.701 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.52, + "confidence": 0.957 + }, + { + "text": "soir", + "start": 135.52, + "end": 135.76, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.76, + "end": 135.94, + "confidence": 0.983 + }, + { + "text": "discuter", + "start": 135.94, + "end": 136.26, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.26, + "end": 136.44, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.44, + "end": 136.6, + "confidence": 0.976 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.84, + "confidence": 0.948 + } + ] + }, + { + "id": 39, + "seek": 13702, + "start": 137.26, + "end": 141.8, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui", + "tokens": [ + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.716, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.52, + "confidence": 0.967 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.6, + "confidence": 0.859 + }, + { + "text": "ne", + "start": 137.6, + "end": 137.68, + "confidence": 0.681 + }, + { + "text": "ressemble", + "start": 137.68, + "end": 138.14, + "confidence": 0.743 + }, + { + "text": "pas", + "start": 138.14, + "end": 138.66, + "confidence": 0.524 + }, + { + "text": "à", + "start": 138.66, + "end": 138.94, + "confidence": 0.978 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.02, + "confidence": 0.605 + }, + { + "text": "qu'on", + "start": 139.02, + "end": 139.16, + "confidence": 0.961 + }, + { + "text": "peut", + "start": 139.16, + "end": 139.3, + "confidence": 0.586 + }, + { + "text": "ressentir", + "start": 139.3, + "end": 140.04, + "confidence": 0.898 + }, + { + "text": "à", + "start": 140.04, + "end": 140.24, + "confidence": 0.352 + }, + { + "text": "voir", + "start": 140.24, + "end": 140.48, + "confidence": 0.918 + }, + { + "text": "même", + "start": 140.48, + "end": 140.86, + "confidence": 0.324 + }, + { + "text": "mome", + "start": 140.86, + "end": 141.18, + "confidence": 0.374 + }, + { + "text": "aujourd'hui", + "start": 141.18, + "end": 141.8, + "confidence": 0.949 + } + ] + }, + { + "id": 40, + "seek": 13702, + "start": 141.92, + "end": 145.76, + "text": " continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 2993, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.584, + "words": [ + { + "text": "continuuellement", + "start": 141.92, + "end": 142.8, + "confidence": 0.327 + }, + { + "text": "avec", + "start": 142.8, + "end": 143.18, + "confidence": 0.903 + }, + { + "text": "son", + "start": 143.18, + "end": 143.36, + "confidence": 0.866 + }, + { + "text": "smartphone", + "start": 143.36, + "end": 143.64, + "confidence": 0.467 + }, + { + "text": "dans", + "start": 143.64, + "end": 143.92, + "confidence": 0.554 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.972 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.26, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 144.26, + "end": 144.52, + "confidence": 0.95 + }, + { + "text": "c'était", + "start": 144.52, + "end": 144.78, + "confidence": 0.574 + }, + { + "text": "une", + "start": 144.78, + "end": 144.94, + "confidence": 0.971 + }, + { + "text": "sorte", + "start": 144.94, + "end": 145.1, + "confidence": 0.642 + }, + { + "text": "de", + "start": 145.1, + "end": 145.18, + "confidence": 0.268 + }, + { + "text": "estimateur", + "start": 145.18, + "end": 145.76, + "confidence": 0.415 + } + ] + }, + { + "id": 41, + "seek": 13702, + "start": 145.94, + "end": 148.88, + "text": " extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 1279, + 4212, + 368, + 287, + 6, + 686, + 15970, + 271, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.401, + "words": [ + { + "text": "extère", + "start": 145.94, + "end": 146.38, + "confidence": 0.357 + }, + { + "text": "de", + "start": 146.38, + "end": 146.46, + "confidence": 0.253 + }, + { + "text": "l'intempis", + "start": 146.46, + "end": 146.7, + "confidence": 0.153 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.858 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.32, + "confidence": 0.824 + }, + { + "text": "à", + "start": 147.32, + "end": 147.44, + "confidence": 0.494 + }, + { + "text": "l'éantrénée,", + "start": 147.44, + "end": 147.94, + "confidence": 0.476 + }, + { + "text": "ça", + "start": 147.94, + "end": 148.0, + "confidence": 0.772 + }, + { + "text": "m'a", + "start": 148.0, + "end": 148.26, + "confidence": 0.532 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.42, + "confidence": 0.181 + }, + { + "text": "immédiate.", + "start": 148.42, + "end": 148.88, + "confidence": 0.537 + } + ] + }, + { + "id": 42, + "seek": 13702, + "start": 149.08, + "end": 152.02, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 2016, + 3269, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.529, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.3, + "confidence": 0.285 + }, + { + "text": "je", + "start": 149.3, + "end": 149.34, + "confidence": 0.425 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.42, + "confidence": 0.316 + }, + { + "text": "ça", + "start": 149.42, + "end": 149.62, + "confidence": 0.953 + }, + { + "text": "pour", + "start": 149.62, + "end": 149.72, + "confidence": 0.971 + }, + { + "text": "le", + "start": 149.72, + "end": 149.82, + "confidence": 0.993 + }, + { + "text": "mome,", + "start": 149.82, + "end": 150.24, + "confidence": 0.575 + }, + { + "text": "mais", + "start": 150.24, + "end": 150.44, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 150.44, + "end": 151.1, + "confidence": 0.775 + }, + { + "text": "va", + "start": 151.1, + "end": 151.28, + "confidence": 0.381 + }, + { + "text": "là", + "start": 151.28, + "end": 151.42, + "confidence": 0.633 + }, + { + "text": "pour", + "start": 151.42, + "end": 151.62, + "confidence": 0.398 + }, + { + "text": "nos", + "start": 151.62, + "end": 151.7, + "confidence": 0.807 + }, + { + "text": "aussi.", + "start": 151.7, + "end": 152.02, + "confidence": 0.145 + } + ] + }, + { + "id": 43, + "seek": 13702, + "start": 152.66, + "end": 154.4, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.851, + "words": [ + { + "text": "Donc,", + "start": 152.66, + "end": 153.0, + "confidence": 0.985 + }, + { + "text": "rapport", + "start": 153.0, + "end": 153.42, + "confidence": 0.977 + }, + { + "text": "immédiate", + "start": 153.42, + "end": 154.26, + "confidence": 0.825 + }, + { + "text": "d'accord.", + "start": 154.26, + "end": 154.4, + "confidence": 0.806 + } + ] + }, + { + "id": 44, + "seek": 13702, + "start": 154.4, + "end": 157.95, + "text": " Mais pourquoi, à ton impression qu'on en sortira jamais?", + "tokens": [ + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 14540, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.791, + "words": [ + { + "text": "Mais", + "start": 154.4, + "end": 155.9, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.9, + "end": 156.36, + "confidence": 0.959 + }, + { + "text": "à", + "start": 156.36, + "end": 156.48, + "confidence": 0.821 + }, + { + "text": "ton", + "start": 156.48, + "end": 156.66, + "confidence": 0.952 + }, + { + "text": "impression", + "start": 156.66, + "end": 156.96, + "confidence": 0.932 + }, + { + "text": "qu'on", + "start": 156.96, + "end": 157.28, + "confidence": 0.906 + }, + { + "text": "en", + "start": 157.28, + "end": 157.34, + "confidence": 0.789 + }, + { + "text": "sortira", + "start": 157.34, + "end": 157.84, + "confidence": 0.543 + }, + { + "text": "jamais?", + "start": 157.84, + "end": 157.95, + "confidence": 0.503 + } + ] + }, + { + "id": 45, + "seek": 13702, + "start": 157.95, + "end": 162.37, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette", + "tokens": [ + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.82, + "words": [ + { + "text": "Et", + "start": 157.95, + "end": 159.26, + "confidence": 0.838 + }, + { + "text": "puis,", + "start": 159.26, + "end": 159.58, + "confidence": 0.481 + }, + { + "text": "il", + "start": 159.58, + "end": 159.62, + "confidence": 0.665 + }, + { + "text": "faut", + "start": 159.62, + "end": 159.66, + "confidence": 0.942 + }, + { + "text": "en", + "start": 159.66, + "end": 159.72, + "confidence": 0.925 + }, + { + "text": "remettre", + "start": 159.72, + "end": 160.1, + "confidence": 0.985 + }, + { + "text": "la", + "start": 160.1, + "end": 160.28, + "confidence": 0.702 + }, + { + "text": "faute", + "start": 160.28, + "end": 160.62, + "confidence": 0.55 + }, + { + "text": "sur", + "start": 160.62, + "end": 160.9, + "confidence": 0.954 + }, + { + "text": "les", + "start": 160.9, + "end": 161.22, + "confidence": 0.81 + }, + { + "text": "gens", + "start": 161.22, + "end": 161.42, + "confidence": 0.985 + }, + { + "text": "qui", + "start": 161.42, + "end": 161.58, + "confidence": 0.981 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.62, + "confidence": 0.955 + }, + { + "text": "créé", + "start": 161.62, + "end": 162.3, + "confidence": 0.957 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.37, + "confidence": 0.9 + } + ] + }, + { + "id": 46, + "seek": 16228, + "start": 162.37, + "end": 165.3, + "text": " route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux.", + "tokens": [ + 7955, + 3551, + 303, + 3409, + 2449, + 1030, + 33227, + 401, + 1925, + 11, + 1956, + 257, + 6176, + 1531, + 748, + 285, + 1925, + 971, + 45781, + 11, + 3551, + 303, + 3409, + 2449, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.397, + "words": [ + { + "text": "route", + "start": 162.37, + "end": 162.6, + "confidence": 0.076 + }, + { + "text": "merveilleux", + "start": 162.6, + "end": 163.3, + "confidence": 0.635 + }, + { + "text": "et", + "start": 163.3, + "end": 163.42, + "confidence": 0.837 + }, + { + "text": "diabolique,", + "start": 163.42, + "end": 163.82, + "confidence": 0.328 + }, + { + "text": "qui", + "start": 163.82, + "end": 163.9, + "confidence": 0.365 + }, + { + "text": "a", + "start": 163.9, + "end": 163.98, + "confidence": 0.054 + }, + { + "text": "dit", + "start": 163.98, + "end": 164.06, + "confidence": 0.085 + }, + { + "text": "à", + "start": 164.06, + "end": 164.1, + "confidence": 0.433 + }, + { + "text": "bollique", + "start": 164.1, + "end": 164.3, + "confidence": 0.297 + }, + { + "text": "par", + "start": 164.3, + "end": 164.52, + "confidence": 0.634 + }, + { + "text": "coeur,", + "start": 164.52, + "end": 164.78, + "confidence": 0.361 + }, + { + "text": "merveilleux.", + "start": 164.78, + "end": 165.3, + "confidence": 0.982 + } + ] + }, + { + "id": 47, + "seek": 16228, + "start": 167.36, + "end": 168.7, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.748, + "words": [ + { + "text": "Les", + "start": 167.36, + "end": 167.4, + "confidence": 0.513 + }, + { + "text": "économistes", + "start": 167.4, + "end": 167.5, + "confidence": 0.937 + }, + { + "text": "parlent", + "start": 167.5, + "end": 167.78, + "confidence": 0.806 + }, + { + "text": "de", + "start": 167.78, + "end": 167.82, + "confidence": 0.855 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.32, + "confidence": 0.758 + }, + { + "text": "du", + "start": 168.32, + "end": 168.5, + "confidence": 0.979 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.7, + "confidence": 0.389 + } + ] + }, + { + "id": 48, + "seek": 16228, + "start": 168.84, + "end": 172.66, + "text": " Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine,", + "tokens": [ + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 6302, + 11, + 269, + 6, + 377, + 517, + 12703, + 1108, + 1433, + 533, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.487, + "words": [ + { + "text": "Ces", + "start": 168.84, + "end": 169.14, + "confidence": 0.531 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.5, + "confidence": 0.725 + }, + { + "text": "en", + "start": 169.5, + "end": 169.62, + "confidence": 0.757 + }, + { + "text": "fait,", + "start": 169.62, + "end": 169.68, + "confidence": 0.974 + }, + { + "text": "on", + "start": 169.68, + "end": 169.74, + "confidence": 0.306 + }, + { + "text": "est", + "start": 169.74, + "end": 169.86, + "confidence": 0.82 + }, + { + "text": "un", + "start": 169.86, + "end": 170.16, + "confidence": 0.245 + }, + { + "text": "santé", + "start": 170.16, + "end": 170.72, + "confidence": 0.943 + }, + { + "text": "qui", + "start": 170.72, + "end": 170.88, + "confidence": 0.839 + }, + { + "text": "a", + "start": 170.88, + "end": 170.96, + "confidence": 0.533 + }, + { + "text": "été", + "start": 170.96, + "end": 171.08, + "confidence": 0.992 + }, + { + "text": "établie,", + "start": 171.08, + "end": 171.6, + "confidence": 0.316 + }, + { + "text": "c'est", + "start": 171.6, + "end": 171.74, + "confidence": 0.55 + }, + { + "text": "un", + "start": 171.74, + "end": 171.88, + "confidence": 0.651 + }, + { + "text": "soit", + "start": 171.88, + "end": 172.12, + "confidence": 0.284 + }, + { + "text": "mon", + "start": 172.12, + "end": 172.28, + "confidence": 0.225 + }, + { + "text": "termine,", + "start": 172.28, + "end": 172.66, + "confidence": 0.288 + } + ] + }, + { + "id": 49, + "seek": 17312, + "start": 173.14, + "end": 177.42, + "text": " soit définissant des beurs, on définisse un signalétique.", + "tokens": [ + 50364, + 12703, + 40763, + 29492, + 730, + 312, + 2156, + 11, + 322, + 40763, + 7746, + 517, + 6358, + 42379, + 13, + 51436 + ], + "temperature": 0.0, + "avg_logprob": -0.9428024291992188, + "compression_ratio": 1.0169491525423728, + "no_speech_prob": 6.687085260637105e-05, + "confidence": 0.403, + "words": [ + { + "text": "soit", + "start": 173.14, + "end": 174.04, + "confidence": 0.127 + }, + { + "text": "définissant", + "start": 174.04, + "end": 175.5, + "confidence": 0.58 + }, + { + "text": "des", + "start": 175.5, + "end": 175.68, + "confidence": 0.813 + }, + { + "text": "beurs,", + "start": 175.68, + "end": 175.96, + "confidence": 0.378 + }, + { + "text": "on", + "start": 175.96, + "end": 176.04, + "confidence": 0.165 + }, + { + "text": "définisse", + "start": 176.04, + "end": 176.42, + "confidence": 0.536 + }, + { + "text": "un", + "start": 176.42, + "end": 176.6, + "confidence": 0.266 + }, + { + "text": "signalétique.", + "start": 176.6, + "end": 177.42, + "confidence": 0.544 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/bonjour.wav.words.json b/tests/expected/tiny_fr/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1901436ee71322379b0220469be87670b650ff07 --- /dev/null +++ b/tests/expected/tiny_fr/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.96, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.698755931854248, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.019103480502963066, + "confidence": 0.828, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.96, + "confidence": 0.828 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny_fr/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2a2328a1c0033e8baffc7e157ddd45fe78519fb0 --- /dev/null +++ b/tests/expected/tiny_fr/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,162 @@ +{ + "text": " Bonjour ! Est-ce que vous allez bien ? Bonjour ! Esque vous allez bien !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 1.44, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50438 + ], + "temperature": 0.0, + "avg_logprob": -0.7718849182128906, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.04268376901745796, + "confidence": 0.69, + "words": [ + { + "text": "Bonjour !", + "start": 0.44, + "end": 1.44, + "confidence": 0.69 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.88, + "end": 3.12, + "text": " Est-ce que vous allez bien ?", + "tokens": [ + 50438, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50538 + ], + "temperature": 0.0, + "avg_logprob": -0.7718849182128906, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.04268376901745796, + "confidence": 0.68, + "words": [ + { + "text": "Est-ce", + "start": 1.88, + "end": 2.2, + "confidence": 0.668 + }, + { + "text": "que", + "start": 2.2, + "end": 2.24, + "confidence": 0.875 + }, + { + "text": "vous", + "start": 2.24, + "end": 2.36, + "confidence": 0.993 + }, + { + "text": "allez", + "start": 2.36, + "end": 2.56, + "confidence": 0.268 + }, + { + "text": "bien ?", + "start": 2.56, + "end": 3.12, + "confidence": 0.973 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 32.98, + "end": 33.48, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50514 + ], + "temperature": 0.0, + "avg_logprob": -0.9282397490281326, + "compression_ratio": 0.8048780487804879, + "no_speech_prob": 0.35444578528404236, + "confidence": 0.532, + "words": [ + { + "text": "Bonjour !", + "start": 32.98, + "end": 33.48, + "confidence": 0.532 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 34.42, + "end": 35.48, + "text": " Esque vous allez bien !", + "tokens": [ + 50514, + 2313, + 1077, + 2630, + 18146, + 3610, + 2298, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -0.9282397490281326, + "compression_ratio": 0.8048780487804879, + "no_speech_prob": 0.35444578528404236, + "confidence": 0.483, + "words": [ + { + "text": "Esque", + "start": 34.42, + "end": 34.76, + "confidence": 0.185 + }, + { + "text": "vous", + "start": 34.76, + "end": 34.9, + "confidence": 0.982 + }, + { + "text": "allez", + "start": 34.9, + "end": 35.08, + "confidence": 0.78 + }, + { + "text": "bien !", + "start": 35.08, + "end": 35.48, + "confidence": 0.998 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/empty.mp3.words.json b/tests/expected/tiny_fr/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..cd4c337f3962718881cc93300e57e4f7d05feedb --- /dev/null +++ b/tests/expected/tiny_fr/empty.mp3.words.json @@ -0,0 +1,5 @@ +{ + "text": "", + "segments": [], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/gaenswein15.mp3.words.json b/tests/expected/tiny_fr/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..61e7eecb07b714d80941528af67c9b1bd52adf69 --- /dev/null +++ b/tests/expected/tiny_fr/gaenswein15.mp3.words.json @@ -0,0 +1,71 @@ +{ + "text": " Leur de l'Ontario est de la", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 20.14, + "end": 24.68, + "text": " Leur de l'Ontario est de la", + "tokens": [ + 50364, + 1456, + 374, + 368, + 287, + 6, + 46, + 580, + 4912, + 871, + 368, + 635, + 51571 + ], + "temperature": 0.0, + "avg_logprob": -1.7973311288016183, + "compression_ratio": 0.84375, + "no_speech_prob": 0.04554257169365883, + "confidence": 0.152, + "words": [ + { + "text": "Leur", + "start": 20.14, + "end": 20.16, + "confidence": 0.055 + }, + { + "text": "de", + "start": 20.16, + "end": 20.18, + "confidence": 0.11 + }, + { + "text": "l'Ontario", + "start": 20.18, + "end": 24.62, + "confidence": 0.309 + }, + { + "text": "est", + "start": 24.62, + "end": 24.64, + "confidence": 0.128 + }, + { + "text": "de", + "start": 24.64, + "end": 24.66, + "confidence": 0.063 + }, + { + "text": "la", + "start": 24.66, + "end": 24.68, + "confidence": 0.133 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/gloria.mp3.words.json b/tests/expected/tiny_fr/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7696df1cf6e63badd0242c8125d2f4ccdc4fc029 --- /dev/null +++ b/tests/expected/tiny_fr/gloria.mp3.words.json @@ -0,0 +1,1584 @@ +{ + "text": " Je suis très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.34, + "end": 29.98, + "text": " Je suis très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très très", + "tokens": [ + 50364, + 2588, + 7624, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732, + 5732 + ], + "temperature": 0.0, + "avg_logprob": -0.3068582153320312, + "compression_ratio": 43.0, + "no_speech_prob": 0.11139103770256042, + "confidence": 0.735, + "words": [ + { + "text": "Je", + "start": 1.34, + "end": 5.76, + "confidence": 0.043 + }, + { + "text": "suis", + "start": 5.76, + "end": 5.92, + "confidence": 0.155 + }, + { + "text": "très", + "start": 5.92, + "end": 6.2, + "confidence": 0.052 + }, + { + "text": "très", + "start": 6.2, + "end": 10.56, + "confidence": 0.107 + }, + { + "text": "très", + "start": 10.56, + "end": 11.22, + "confidence": 0.16 + }, + { + "text": "très", + "start": 11.22, + "end": 11.34, + "confidence": 0.186 + }, + { + "text": "très", + "start": 11.34, + "end": 11.36, + "confidence": 0.2 + }, + { + "text": "très", + "start": 11.36, + "end": 11.38, + "confidence": 0.229 + }, + { + "text": "très", + "start": 11.38, + "end": 11.4, + "confidence": 0.257 + }, + { + "text": "très", + "start": 11.4, + "end": 11.42, + "confidence": 0.283 + }, + { + "text": "très", + "start": 11.42, + "end": 11.44, + "confidence": 0.322 + }, + { + "text": "très", + "start": 11.44, + "end": 11.46, + "confidence": 0.382 + }, + { + "text": "très", + "start": 11.46, + "end": 11.48, + "confidence": 0.458 + }, + { + "text": "très", + "start": 11.48, + "end": 11.5, + "confidence": 0.537 + }, + { + "text": "très", + "start": 11.5, + "end": 11.52, + "confidence": 0.592 + }, + { + "text": "très", + "start": 11.52, + "end": 11.54, + "confidence": 0.634 + }, + { + "text": "très", + "start": 11.54, + "end": 11.56, + "confidence": 0.67 + }, + { + "text": "très", + "start": 11.56, + "end": 11.58, + "confidence": 0.689 + }, + { + "text": "très", + "start": 11.58, + "end": 11.6, + "confidence": 0.702 + }, + { + "text": "très", + "start": 11.6, + "end": 11.62, + "confidence": 0.719 + }, + { + "text": "très", + "start": 11.62, + "end": 11.64, + "confidence": 0.722 + }, + { + "text": "très", + "start": 11.64, + "end": 11.66, + "confidence": 0.734 + }, + { + "text": "très", + "start": 11.66, + "end": 11.68, + "confidence": 0.74 + }, + { + "text": "très", + "start": 11.68, + "end": 11.7, + "confidence": 0.749 + }, + { + "text": "très", + "start": 11.7, + "end": 11.72, + "confidence": 0.753 + }, + { + "text": "très", + "start": 11.72, + "end": 11.74, + "confidence": 0.75 + }, + { + "text": "très", + "start": 11.74, + "end": 11.76, + "confidence": 0.753 + }, + { + "text": "très", + "start": 11.76, + "end": 11.78, + "confidence": 0.756 + }, + { + "text": "très", + "start": 11.78, + "end": 11.8, + "confidence": 0.76 + }, + { + "text": "très", + "start": 11.8, + "end": 11.82, + "confidence": 0.76 + }, + { + "text": "très", + "start": 11.82, + "end": 11.84, + "confidence": 0.76 + }, + { + "text": "très", + "start": 11.84, + "end": 11.86, + "confidence": 0.766 + }, + { + "text": "très", + "start": 11.86, + "end": 11.88, + "confidence": 0.769 + }, + { + "text": "très", + "start": 11.88, + "end": 11.9, + "confidence": 0.768 + }, + { + "text": "très", + "start": 11.9, + "end": 11.92, + "confidence": 0.768 + }, + { + "text": "très", + "start": 11.92, + "end": 11.94, + "confidence": 0.769 + }, + { + "text": "très", + "start": 11.94, + "end": 11.96, + "confidence": 0.773 + }, + { + "text": "très", + "start": 11.96, + "end": 11.98, + "confidence": 0.775 + }, + { + "text": "très", + "start": 11.98, + "end": 12.0, + "confidence": 0.776 + }, + { + "text": "très", + "start": 12.0, + "end": 12.02, + "confidence": 0.776 + }, + { + "text": "très", + "start": 12.02, + "end": 12.8, + "confidence": 0.78 + }, + { + "text": "très", + "start": 12.8, + "end": 13.64, + "confidence": 0.783 + }, + { + "text": "très", + "start": 13.64, + "end": 13.7, + "confidence": 0.785 + }, + { + "text": "très", + "start": 13.7, + "end": 14.74, + "confidence": 0.782 + }, + { + "text": "très", + "start": 14.74, + "end": 14.76, + "confidence": 0.78 + }, + { + "text": "très", + "start": 14.76, + "end": 14.96, + "confidence": 0.782 + }, + { + "text": "très", + "start": 14.96, + "end": 14.98, + "confidence": 0.783 + }, + { + "text": "très", + "start": 14.98, + "end": 15.32, + "confidence": 0.786 + }, + { + "text": "très", + "start": 15.32, + "end": 15.94, + "confidence": 0.786 + }, + { + "text": "très", + "start": 15.94, + "end": 15.96, + "confidence": 0.787 + }, + { + "text": "très", + "start": 15.96, + "end": 15.98, + "confidence": 0.791 + }, + { + "text": "très", + "start": 15.98, + "end": 16.0, + "confidence": 0.791 + }, + { + "text": "très", + "start": 16.0, + "end": 16.02, + "confidence": 0.787 + }, + { + "text": "très", + "start": 16.02, + "end": 16.04, + "confidence": 0.788 + }, + { + "text": "très", + "start": 16.04, + "end": 16.06, + "confidence": 0.786 + }, + { + "text": "très", + "start": 16.06, + "end": 17.54, + "confidence": 0.794 + }, + { + "text": "très", + "start": 17.54, + "end": 17.56, + "confidence": 0.791 + }, + { + "text": "très", + "start": 17.56, + "end": 17.58, + "confidence": 0.794 + }, + { + "text": "très", + "start": 17.58, + "end": 17.6, + "confidence": 0.793 + }, + { + "text": "très", + "start": 17.6, + "end": 17.62, + "confidence": 0.797 + }, + { + "text": "très", + "start": 17.62, + "end": 17.64, + "confidence": 0.796 + }, + { + "text": "très", + "start": 17.64, + "end": 17.66, + "confidence": 0.797 + }, + { + "text": "très", + "start": 17.66, + "end": 17.68, + "confidence": 0.796 + }, + { + "text": "très", + "start": 17.68, + "end": 17.7, + "confidence": 0.798 + }, + { + "text": "très", + "start": 17.7, + "end": 17.72, + "confidence": 0.801 + }, + { + "text": "très", + "start": 17.72, + "end": 17.74, + "confidence": 0.804 + }, + { + "text": "très", + "start": 17.74, + "end": 17.76, + "confidence": 0.804 + }, + { + "text": "très", + "start": 17.76, + "end": 17.78, + "confidence": 0.802 + }, + { + "text": "très", + "start": 17.78, + "end": 17.8, + "confidence": 0.805 + }, + { + "text": "très", + "start": 17.8, + "end": 17.82, + "confidence": 0.806 + }, + { + "text": "très", + "start": 17.82, + "end": 17.84, + "confidence": 0.808 + }, + { + "text": "très", + "start": 17.84, + "end": 17.86, + "confidence": 0.812 + }, + { + "text": "très", + "start": 17.86, + "end": 17.88, + "confidence": 0.815 + }, + { + "text": "très", + "start": 17.88, + "end": 17.9, + "confidence": 0.818 + }, + { + "text": "très", + "start": 17.9, + "end": 17.92, + "confidence": 0.819 + }, + { + "text": "très", + "start": 17.92, + "end": 17.94, + "confidence": 0.821 + }, + { + "text": "très", + "start": 17.94, + "end": 17.96, + "confidence": 0.824 + }, + { + "text": "très", + "start": 17.96, + "end": 17.98, + "confidence": 0.824 + }, + { + "text": "très", + "start": 17.98, + "end": 19.34, + "confidence": 0.826 + }, + { + "text": "très", + "start": 19.34, + "end": 19.36, + "confidence": 0.828 + }, + { + "text": "très", + "start": 19.36, + "end": 19.38, + "confidence": 0.835 + }, + { + "text": "très", + "start": 19.38, + "end": 19.4, + "confidence": 0.836 + }, + { + "text": "très", + "start": 19.4, + "end": 19.42, + "confidence": 0.839 + }, + { + "text": "très", + "start": 19.42, + "end": 19.44, + "confidence": 0.839 + }, + { + "text": "très", + "start": 19.44, + "end": 19.46, + "confidence": 0.841 + }, + { + "text": "très", + "start": 19.46, + "end": 19.48, + "confidence": 0.841 + }, + { + "text": "très", + "start": 19.48, + "end": 19.5, + "confidence": 0.843 + }, + { + "text": "très", + "start": 19.5, + "end": 19.52, + "confidence": 0.845 + }, + { + "text": "très", + "start": 19.52, + "end": 19.54, + "confidence": 0.849 + }, + { + "text": "très", + "start": 19.54, + "end": 19.56, + "confidence": 0.848 + }, + { + "text": "très", + "start": 19.56, + "end": 19.58, + "confidence": 0.848 + }, + { + "text": "très", + "start": 19.58, + "end": 19.6, + "confidence": 0.849 + }, + { + "text": "très", + "start": 19.6, + "end": 19.62, + "confidence": 0.852 + }, + { + "text": "très", + "start": 19.62, + "end": 19.64, + "confidence": 0.851 + }, + { + "text": "très", + "start": 19.64, + "end": 20.06, + "confidence": 0.853 + }, + { + "text": "très", + "start": 20.06, + "end": 20.56, + "confidence": 0.854 + }, + { + "text": "très", + "start": 20.56, + "end": 20.58, + "confidence": 0.854 + }, + { + "text": "très", + "start": 20.58, + "end": 20.6, + "confidence": 0.856 + }, + { + "text": "très", + "start": 20.6, + "end": 20.62, + "confidence": 0.857 + }, + { + "text": "très", + "start": 20.62, + "end": 20.64, + "confidence": 0.857 + }, + { + "text": "très", + "start": 20.64, + "end": 20.66, + "confidence": 0.86 + }, + { + "text": "très", + "start": 20.66, + "end": 20.68, + "confidence": 0.857 + }, + { + "text": "très", + "start": 20.68, + "end": 20.7, + "confidence": 0.86 + }, + { + "text": "très", + "start": 20.7, + "end": 20.72, + "confidence": 0.862 + }, + { + "text": "très", + "start": 20.72, + "end": 20.74, + "confidence": 0.865 + }, + { + "text": "très", + "start": 20.74, + "end": 20.76, + "confidence": 0.866 + }, + { + "text": "très", + "start": 20.76, + "end": 20.78, + "confidence": 0.866 + }, + { + "text": "très", + "start": 20.78, + "end": 20.8, + "confidence": 0.867 + }, + { + "text": "très", + "start": 20.8, + "end": 20.82, + "confidence": 0.863 + }, + { + "text": "très", + "start": 20.82, + "end": 20.84, + "confidence": 0.863 + }, + { + "text": "très", + "start": 20.84, + "end": 20.86, + "confidence": 0.864 + }, + { + "text": "très", + "start": 20.86, + "end": 20.88, + "confidence": 0.863 + }, + { + "text": "très", + "start": 20.88, + "end": 20.9, + "confidence": 0.863 + }, + { + "text": "très", + "start": 20.9, + "end": 20.92, + "confidence": 0.861 + }, + { + "text": "très", + "start": 20.92, + "end": 20.94, + "confidence": 0.863 + }, + { + "text": "très", + "start": 20.94, + "end": 20.96, + "confidence": 0.863 + }, + { + "text": "très", + "start": 20.96, + "end": 20.98, + "confidence": 0.858 + }, + { + "text": "très", + "start": 20.98, + "end": 21.0, + "confidence": 0.856 + }, + { + "text": "très", + "start": 21.0, + "end": 21.02, + "confidence": 0.858 + }, + { + "text": "très", + "start": 21.02, + "end": 21.04, + "confidence": 0.857 + }, + { + "text": "très", + "start": 21.04, + "end": 21.06, + "confidence": 0.852 + }, + { + "text": "très", + "start": 21.06, + "end": 21.08, + "confidence": 0.851 + }, + { + "text": "très", + "start": 21.08, + "end": 21.1, + "confidence": 0.852 + }, + { + "text": "très", + "start": 21.1, + "end": 21.12, + "confidence": 0.852 + }, + { + "text": "très", + "start": 21.12, + "end": 21.14, + "confidence": 0.853 + }, + { + "text": "très", + "start": 21.14, + "end": 21.16, + "confidence": 0.848 + }, + { + "text": "très", + "start": 21.16, + "end": 21.18, + "confidence": 0.849 + }, + { + "text": "très", + "start": 21.18, + "end": 21.2, + "confidence": 0.85 + }, + { + "text": "très", + "start": 21.2, + "end": 21.22, + "confidence": 0.844 + }, + { + "text": "très", + "start": 21.22, + "end": 21.24, + "confidence": 0.849 + }, + { + "text": "très", + "start": 21.24, + "end": 21.26, + "confidence": 0.85 + }, + { + "text": "très", + "start": 21.26, + "end": 21.28, + "confidence": 0.851 + }, + { + "text": "très", + "start": 21.28, + "end": 21.3, + "confidence": 0.85 + }, + { + "text": "très", + "start": 21.3, + "end": 21.32, + "confidence": 0.847 + }, + { + "text": "très", + "start": 21.32, + "end": 21.34, + "confidence": 0.846 + }, + { + "text": "très", + "start": 21.34, + "end": 21.36, + "confidence": 0.843 + }, + { + "text": "très", + "start": 21.36, + "end": 21.38, + "confidence": 0.844 + }, + { + "text": "très", + "start": 21.38, + "end": 21.4, + "confidence": 0.846 + }, + { + "text": "très", + "start": 21.4, + "end": 21.42, + "confidence": 0.838 + }, + { + "text": "très", + "start": 21.42, + "end": 21.44, + "confidence": 0.842 + }, + { + "text": "très", + "start": 21.44, + "end": 21.46, + "confidence": 0.84 + }, + { + "text": "très", + "start": 21.46, + "end": 21.48, + "confidence": 0.844 + }, + { + "text": "très", + "start": 21.48, + "end": 21.5, + "confidence": 0.84 + }, + { + "text": "très", + "start": 21.5, + "end": 21.52, + "confidence": 0.834 + }, + { + "text": "très", + "start": 21.52, + "end": 21.54, + "confidence": 0.826 + }, + { + "text": "très", + "start": 21.54, + "end": 21.56, + "confidence": 0.824 + }, + { + "text": "très", + "start": 21.56, + "end": 21.58, + "confidence": 0.822 + }, + { + "text": "très", + "start": 21.58, + "end": 21.6, + "confidence": 0.822 + }, + { + "text": "très", + "start": 21.6, + "end": 21.62, + "confidence": 0.823 + }, + { + "text": "très", + "start": 21.62, + "end": 21.64, + "confidence": 0.82 + }, + { + "text": "très", + "start": 21.64, + "end": 21.66, + "confidence": 0.826 + }, + { + "text": "très", + "start": 21.66, + "end": 21.68, + "confidence": 0.82 + }, + { + "text": "très", + "start": 21.68, + "end": 21.7, + "confidence": 0.817 + }, + { + "text": "très", + "start": 21.7, + "end": 21.72, + "confidence": 0.815 + }, + { + "text": "très", + "start": 21.72, + "end": 21.74, + "confidence": 0.816 + }, + { + "text": "très", + "start": 21.74, + "end": 21.76, + "confidence": 0.818 + }, + { + "text": "très", + "start": 21.76, + "end": 21.78, + "confidence": 0.812 + }, + { + "text": "très", + "start": 21.78, + "end": 21.8, + "confidence": 0.82 + }, + { + "text": "très", + "start": 21.8, + "end": 21.82, + "confidence": 0.811 + }, + { + "text": "très", + "start": 21.82, + "end": 21.84, + "confidence": 0.814 + }, + { + "text": "très", + "start": 21.84, + "end": 21.86, + "confidence": 0.82 + }, + { + "text": "très", + "start": 21.86, + "end": 21.88, + "confidence": 0.811 + }, + { + "text": "très", + "start": 21.88, + "end": 21.9, + "confidence": 0.816 + }, + { + "text": "très", + "start": 21.9, + "end": 21.92, + "confidence": 0.813 + }, + { + "text": "très", + "start": 21.92, + "end": 21.94, + "confidence": 0.811 + }, + { + "text": "très", + "start": 21.94, + "end": 21.96, + "confidence": 0.809 + }, + { + "text": "très", + "start": 21.96, + "end": 21.98, + "confidence": 0.802 + }, + { + "text": "très", + "start": 21.98, + "end": 22.0, + "confidence": 0.802 + }, + { + "text": "très", + "start": 22.0, + "end": 22.02, + "confidence": 0.799 + }, + { + "text": "très", + "start": 22.02, + "end": 22.04, + "confidence": 0.793 + }, + { + "text": "très", + "start": 22.04, + "end": 22.06, + "confidence": 0.801 + }, + { + "text": "très", + "start": 22.06, + "end": 22.08, + "confidence": 0.784 + }, + { + "text": "très", + "start": 22.08, + "end": 22.1, + "confidence": 0.78 + }, + { + "text": "très", + "start": 22.1, + "end": 22.12, + "confidence": 0.784 + }, + { + "text": "très", + "start": 22.12, + "end": 22.14, + "confidence": 0.783 + }, + { + "text": "très", + "start": 22.14, + "end": 22.16, + "confidence": 0.777 + }, + { + "text": "très", + "start": 22.16, + "end": 22.18, + "confidence": 0.778 + }, + { + "text": "très", + "start": 22.18, + "end": 22.2, + "confidence": 0.788 + }, + { + "text": "très", + "start": 22.2, + "end": 22.22, + "confidence": 0.789 + }, + { + "text": "très", + "start": 22.22, + "end": 22.24, + "confidence": 0.788 + }, + { + "text": "très", + "start": 22.24, + "end": 22.26, + "confidence": 0.789 + }, + { + "text": "très", + "start": 22.26, + "end": 22.28, + "confidence": 0.786 + }, + { + "text": "très", + "start": 22.28, + "end": 22.3, + "confidence": 0.778 + }, + { + "text": "très", + "start": 22.3, + "end": 22.32, + "confidence": 0.784 + }, + { + "text": "très", + "start": 22.32, + "end": 22.34, + "confidence": 0.793 + }, + { + "text": "très", + "start": 22.34, + "end": 22.36, + "confidence": 0.792 + }, + { + "text": "très", + "start": 22.36, + "end": 22.38, + "confidence": 0.781 + }, + { + "text": "très", + "start": 22.38, + "end": 22.4, + "confidence": 0.788 + }, + { + "text": "très", + "start": 22.4, + "end": 22.42, + "confidence": 0.786 + }, + { + "text": "très", + "start": 22.42, + "end": 22.44, + "confidence": 0.783 + }, + { + "text": "très", + "start": 22.44, + "end": 22.46, + "confidence": 0.781 + }, + { + "text": "très", + "start": 22.46, + "end": 22.48, + "confidence": 0.786 + }, + { + "text": "très", + "start": 22.48, + "end": 22.5, + "confidence": 0.775 + }, + { + "text": "très", + "start": 22.5, + "end": 22.52, + "confidence": 0.778 + }, + { + "text": "très", + "start": 22.52, + "end": 22.54, + "confidence": 0.777 + }, + { + "text": "très", + "start": 22.54, + "end": 22.56, + "confidence": 0.769 + }, + { + "text": "très", + "start": 22.56, + "end": 22.58, + "confidence": 0.769 + }, + { + "text": "très", + "start": 22.58, + "end": 22.6, + "confidence": 0.768 + }, + { + "text": "très", + "start": 22.6, + "end": 22.62, + "confidence": 0.774 + }, + { + "text": "très", + "start": 22.62, + "end": 22.64, + "confidence": 0.763 + }, + { + "text": "très", + "start": 22.64, + "end": 22.66, + "confidence": 0.769 + }, + { + "text": "très", + "start": 22.66, + "end": 22.68, + "confidence": 0.761 + }, + { + "text": "très", + "start": 22.68, + "end": 22.7, + "confidence": 0.771 + }, + { + "text": "très", + "start": 22.7, + "end": 22.72, + "confidence": 0.765 + }, + { + "text": "très", + "start": 22.72, + "end": 22.74, + "confidence": 0.762 + }, + { + "text": "très", + "start": 22.74, + "end": 22.76, + "confidence": 0.773 + }, + { + "text": "très", + "start": 22.76, + "end": 22.78, + "confidence": 0.773 + }, + { + "text": "très", + "start": 22.78, + "end": 22.8, + "confidence": 0.767 + }, + { + "text": "très", + "start": 22.8, + "end": 22.82, + "confidence": 0.763 + }, + { + "text": "très", + "start": 22.82, + "end": 22.84, + "confidence": 0.756 + }, + { + "text": "très", + "start": 22.84, + "end": 22.86, + "confidence": 0.766 + }, + { + "text": "très", + "start": 22.86, + "end": 22.88, + "confidence": 0.766 + }, + { + "text": "très", + "start": 22.88, + "end": 22.9, + "confidence": 0.758 + }, + { + "text": "très", + "start": 22.9, + "end": 22.92, + "confidence": 0.767 + }, + { + "text": "très", + "start": 22.92, + "end": 23.04, + "confidence": 0.763 + }, + { + "text": "très", + "start": 23.04, + "end": 23.06, + "confidence": 0.765 + }, + { + "text": "très", + "start": 23.06, + "end": 23.08, + "confidence": 0.777 + }, + { + "text": "très", + "start": 23.08, + "end": 23.1, + "confidence": 0.785 + }, + { + "text": "très", + "start": 23.1, + "end": 23.5, + "confidence": 0.782 + }, + { + "text": "très", + "start": 23.5, + "end": 23.52, + "confidence": 0.793 + }, + { + "text": "très", + "start": 23.52, + "end": 23.54, + "confidence": 0.822 + }, + { + "text": "très", + "start": 23.54, + "end": 26.68, + "confidence": 0.791 + }, + { + "text": "très", + "start": 26.68, + "end": 29.98, + "confidence": 0.776 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/laugh1.mp3.words.json b/tests/expected/tiny_fr/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..4d9f1b89f34cd35864f9f356a114254966be939d --- /dev/null +++ b/tests/expected/tiny_fr/laugh1.mp3.words.json @@ -0,0 +1,81 @@ +{ + "text": " Je vais vous donner un peu de temps.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.18, + "end": 1.72, + "text": " Je vais vous donner un peu de temps.", + "tokens": [ + 50364, + 2588, + 9369, + 2630, + 20882, + 517, + 5604, + 368, + 8827, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.8613629341125488, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.4177960157394409, + "confidence": 0.103, + "words": [ + { + "text": "Je", + "start": 0.18, + "end": 0.82, + "confidence": 0.067 + }, + { + "text": "vais", + "start": 0.82, + "end": 1.04, + "confidence": 0.128 + }, + { + "text": "vous", + "start": 1.04, + "end": 1.06, + "confidence": 0.074 + }, + { + "text": "donner", + "start": 1.06, + "end": 1.26, + "confidence": 0.048 + }, + { + "text": "un", + "start": 1.26, + "end": 1.66, + "confidence": 0.143 + }, + { + "text": "peu", + "start": 1.66, + "end": 1.68, + "confidence": 0.091 + }, + { + "text": "de", + "start": 1.68, + "end": 1.7, + "confidence": 0.625 + }, + { + "text": "temps.", + "start": 1.7, + "end": 1.72, + "confidence": 0.053 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/laugh2.mp3.words.json b/tests/expected/tiny_fr/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..ecb84adb58577b4525cbc43e4f78967115e2495d --- /dev/null +++ b/tests/expected/tiny_fr/laugh2.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.18, + "end": 0.66, + "text": " ...", + "tokens": [ + 50364, + 1097, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -1.2509205341339111, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.5727680921554565, + "confidence": 0.088, + "words": [ + { + "text": "...", + "start": 0.18, + "end": 0.66, + "confidence": 0.088 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/punctuations.mp3.words.json b/tests/expected/tiny_fr/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..3b8093ece16187cc188cfb74ddd482e3e5b06e0e --- /dev/null +++ b/tests/expected/tiny_fr/punctuations.mp3.words.json @@ -0,0 +1,71 @@ +{ + "text": " Dima, est ce que l'on vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.58, + "text": " Dima, est ce que l'on vole ?", + "tokens": [ + 50364, + 413, + 4775, + 11, + 871, + 1769, + 631, + 287, + 6, + 266, + 49877, + 2506, + 50494 + ], + "temperature": 0.0, + "avg_logprob": -0.84262786592756, + "compression_ratio": 0.7777777777777778, + "no_speech_prob": 0.0010857833549380302, + "confidence": 0.456, + "words": [ + { + "text": "Dima,", + "start": 0.42, + "end": 0.84, + "confidence": 0.247 + }, + { + "text": "est", + "start": 1.2, + "end": 1.4, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 1.4, + "end": 1.56, + "confidence": 0.587 + }, + { + "text": "que", + "start": 1.56, + "end": 1.68, + "confidence": 0.917 + }, + { + "text": "l'on", + "start": 1.68, + "end": 2.02, + "confidence": 0.482 + }, + { + "text": "vole ?", + "start": 2.02, + "end": 2.58, + "confidence": 0.249 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/radio_short.mp3.words.json b/tests/expected/tiny_fr/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b7006f9b2e7859513b6b1c89436472eaeeecb772 --- /dev/null +++ b/tests/expected/tiny_fr/radio_short.mp3.words.json @@ -0,0 +1,106 @@ +{ + "text": " ... ... ... ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 1.46, + "text": " ...", + "tokens": [ + 50364, + 1097, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -1.420121431350708, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.5095687508583069, + "confidence": 0.224, + "words": [ + { + "text": "...", + "start": 0.44, + "end": 1.46, + "confidence": 0.224 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.0, + "end": 69.42, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.965411365032196, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.878373384475708, + "confidence": 0.464, + "words": [ + { + "text": "...", + "start": 60.0, + "end": 69.42, + "confidence": 0.464 + } + ] + }, + { + "id": 2, + "seek": 9000, + "start": 105.48, + "end": 105.5, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.822394847869873, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.6408223509788513, + "confidence": 0.841, + "words": [ + { + "text": "...", + "start": 105.48, + "end": 105.5, + "confidence": 0.841 + } + ] + }, + { + "id": 3, + "seek": 10500, + "start": 106.84, + "end": 108.62, + "text": " ...", + "tokens": [ + 50414, + 1097, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.9275697469711304, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.8346357345581055, + "confidence": 0.469, + "words": [ + { + "text": "...", + "start": 106.84, + "end": 108.62, + "confidence": 0.469 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/smartphone.mp3.words.json b/tests/expected/tiny_fr/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..c4c22c2b044b299c716373c8421ba2b1f620cc0c --- /dev/null +++ b/tests/expected/tiny_fr/smartphone.mp3.words.json @@ -0,0 +1,5091 @@ +{ + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent mentionnées. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très symbolablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport inédit ? Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent. On s'est espèrent de nous voter dans la relation à l'objet. C'est facilement éterricion. Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu en fait, une espèce de relation de médiation avec le monde qui rendent encore avec la maille de celles formes de rogeur. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance et de rogeur. Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure. On a par besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On la pade en la main quand on est colis, qu'on n'a même pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui. Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira, j'amé? Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux et diabolique et diabolique par que merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus, soit des finissants débordes, des finissants, une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.66, + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50545 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.709, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.68, + "confidence": 0.85 + }, + { + "text": "évidence", + "start": 0.68, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.08, + "confidence": 0.882 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.345 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.44, + "confidence": 0.921 + }, + { + "text": "mais", + "start": 1.88, + "end": 2.14, + "confidence": 0.914 + }, + { + "text": "je", + "start": 2.14, + "end": 2.26, + "confidence": 0.778 + }, + { + "text": "me", + "start": 2.26, + "end": 2.34, + "confidence": 0.954 + }, + { + "text": "l'étais", + "start": 2.34, + "end": 2.58, + "confidence": 0.719 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.86, + "confidence": 0.946 + }, + { + "text": "formulé", + "start": 2.86, + "end": 3.26, + "confidence": 0.529 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.46, + "confidence": 0.968 + }, + { + "text": "ça.", + "start": 3.46, + "end": 3.66, + "confidence": 0.96 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.14, + "end": 8.9, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 50545, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267, + 50806 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.789, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.26, + "confidence": 0.394 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.38, + "confidence": 0.939 + }, + { + "text": "fait", + "start": 4.38, + "end": 4.56, + "confidence": 0.731 + }, + { + "text": "la", + "start": 4.56, + "end": 4.72, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.72, + "end": 5.02, + "confidence": 0.93 + }, + { + "text": "du", + "start": 5.02, + "end": 5.2, + "confidence": 0.937 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.58, + "confidence": 0.908 + }, + { + "text": "c'est", + "start": 5.9, + "end": 6.2, + "confidence": 0.948 + }, + { + "text": "pas", + "start": 6.2, + "end": 6.26, + "confidence": 0.983 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.6, + "confidence": 0.993 + }, + { + "text": "la", + "start": 6.6, + "end": 6.8, + "confidence": 0.636 + }, + { + "text": "cumulation", + "start": 6.8, + "end": 7.34, + "confidence": 0.691 + }, + { + "text": "des", + "start": 7.34, + "end": 7.56, + "confidence": 0.793 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 8.14, + "confidence": 0.832 + }, + { + "text": "mais", + "start": 8.38, + "end": 8.5, + "confidence": 0.669 + }, + { + "text": "la", + "start": 8.5, + "end": 8.62, + "confidence": 0.718 + }, + { + "text": "manière", + "start": 8.62, + "end": 8.9, + "confidence": 0.498 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.9, + "end": 10.98, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 50806, + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.303, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.06, + "confidence": 0.283 + }, + { + "text": "quelques", + "start": 9.06, + "end": 9.28, + "confidence": 0.281 + }, + { + "text": "interagues", + "start": 9.28, + "end": 10.38, + "confidence": 0.245 + }, + { + "text": "entraîne.", + "start": 10.38, + "end": 10.98, + "confidence": 0.446 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.0, + "end": 12.96, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 50906, + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.63, + "words": [ + { + "text": "Et", + "start": 11.0, + "end": 11.12, + "confidence": 0.362 + }, + { + "text": "il", + "start": 11.12, + "end": 11.28, + "confidence": 0.144 + }, + { + "text": "est", + "start": 11.28, + "end": 11.38, + "confidence": 0.24 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.6, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.7, + "end": 11.78, + "confidence": 0.886 + }, + { + "text": "la", + "start": 11.78, + "end": 11.8, + "confidence": 0.969 + }, + { + "text": "photo", + "start": 11.8, + "end": 12.02, + "confidence": 0.809 + }, + { + "text": "c'est", + "start": 12.02, + "end": 12.26, + "confidence": 0.784 + }, + { + "text": "hyper", + "start": 12.26, + "end": 12.46, + "confidence": 0.938 + }, + { + "text": "convaincant.", + "start": 12.46, + "end": 12.96, + "confidence": 0.509 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.3, + "end": 18.8, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 608, + 14835, + 1531, + 8862, + 8796, + 5732, + 20847, + 51306 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.533, + "words": [ + { + "text": "Alors", + "start": 13.3, + "end": 13.56, + "confidence": 0.894 + }, + { + "text": "évidemment,", + "start": 13.56, + "end": 13.82, + "confidence": 0.777 + }, + { + "text": "il", + "start": 14.38, + "end": 14.4, + "confidence": 0.964 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.855 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.883 + }, + { + "text": "les", + "start": 15.38, + "end": 15.6, + "confidence": 0.935 + }, + { + "text": "interfaces", + "start": 15.6, + "end": 16.0, + "confidence": 0.381 + }, + { + "text": "les", + "start": 16.0, + "end": 16.5, + "confidence": 0.465 + }, + { + "text": "grand-attachilles", + "start": 16.5, + "end": 17.16, + "confidence": 0.214 + }, + { + "text": "à", + "start": 17.16, + "end": 17.32, + "confidence": 0.408 + }, + { + "text": "été", + "start": 17.32, + "end": 17.72, + "confidence": 0.894 + }, + { + "text": "beaucoup", + "start": 17.72, + "end": 18.28, + "confidence": 0.798 + }, + { + "text": "très", + "start": 18.28, + "end": 18.64, + "confidence": 0.493 + }, + { + "text": "souvent", + "start": 18.64, + "end": 18.8, + "confidence": 0.823 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 18.8, + "end": 19.84, + "text": " mentionnées.", + "tokens": [ + 51306, + 2152, + 77, + 6836, + 13, + 51356 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.626, + "words": [ + { + "text": "mentionnées.", + "start": 18.8, + "end": 19.84, + "confidence": 0.626 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 23.58, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 51356, + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156, + 51543 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.658, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.979 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.46, + "confidence": 0.566 + }, + { + "text": "il", + "start": 20.6, + "end": 20.68, + "confidence": 0.975 + }, + { + "text": "faudrait", + "start": 20.68, + "end": 20.78, + "confidence": 0.773 + }, + { + "text": "que", + "start": 20.78, + "end": 20.92, + "confidence": 0.386 + }, + { + "text": "les", + "start": 20.92, + "end": 20.96, + "confidence": 0.221 + }, + { + "text": "profites", + "start": 20.96, + "end": 21.36, + "confidence": 0.568 + }, + { + "text": "aussi", + "start": 21.36, + "end": 21.72, + "confidence": 0.521 + }, + { + "text": "de", + "start": 21.72, + "end": 21.92, + "confidence": 0.489 + }, + { + "text": "20", + "start": 21.92, + "end": 22.14, + "confidence": 0.915 + }, + { + "text": "ans", + "start": 22.14, + "end": 22.32, + "confidence": 0.942 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.52, + "confidence": 0.915 + }, + { + "text": "l'été,", + "start": 22.52, + "end": 22.86, + "confidence": 0.482 + }, + { + "text": "les", + "start": 22.98, + "end": 23.1, + "confidence": 0.903 + }, + { + "text": "ordinateurs", + "start": 23.1, + "end": 23.58, + "confidence": 0.946 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 23.58, + "end": 28.07, + "text": " nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher,", + "tokens": [ + 51543, + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 11, + 601, + 2947, + 631, + 11, + 11246, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 51766 + ], + "temperature": 0.0, + "avg_logprob": -0.5387768369732481, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14262977242469788, + "confidence": 0.656, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.78, + "confidence": 0.766 + }, + { + "text": "ont", + "start": 23.78, + "end": 23.9, + "confidence": 0.981 + }, + { + "text": "appris", + "start": 23.9, + "end": 24.12, + "confidence": 0.952 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.341 + }, + { + "text": "piquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.446 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.816 + }, + { + "text": "des", + "start": 24.72, + "end": 24.9, + "confidence": 0.96 + }, + { + "text": "icônes,", + "start": 24.9, + "end": 25.56, + "confidence": 0.599 + }, + { + "text": "sauf", + "start": 25.64, + "end": 25.8, + "confidence": 0.521 + }, + { + "text": "que,", + "start": 25.8, + "end": 26.36, + "confidence": 0.915 + }, + { + "text": "alors", + "start": 26.36, + "end": 26.58, + "confidence": 0.399 + }, + { + "text": "le", + "start": 26.58, + "end": 26.72, + "confidence": 0.824 + }, + { + "text": "smartphone", + "start": 26.72, + "end": 27.0, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 27.0, + "end": 27.5, + "confidence": 0.673 + }, + { + "text": "le", + "start": 27.5, + "end": 27.62, + "confidence": 0.554 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.07, + "confidence": 0.631 + } + ] + }, + { + "id": 8, + "seek": 2804, + "start": 28.07, + "end": 30.6, + "text": " qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.741, + "words": [ + { + "text": "qui", + "start": 28.07, + "end": 28.26, + "confidence": 0.304 + }, + { + "text": "rend", + "start": 28.26, + "end": 28.5, + "confidence": 0.762 + }, + { + "text": "le", + "start": 28.5, + "end": 28.72, + "confidence": 0.988 + }, + { + "text": "contact", + "start": 28.72, + "end": 29.06, + "confidence": 0.817 + }, + { + "text": "plus", + "start": 29.06, + "end": 29.48, + "confidence": 0.841 + }, + { + "text": "direct,", + "start": 29.48, + "end": 30.02, + "confidence": 0.945 + }, + { + "text": "plus", + "start": 30.18, + "end": 30.24, + "confidence": 0.992 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.6, + "confidence": 0.618 + } + ] + }, + { + "id": 9, + "seek": 2804, + "start": 31.1, + "end": 34.76, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté", + "tokens": [ + 50496, + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 50698 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.775, + "words": [ + { + "text": "Et", + "start": 31.1, + "end": 31.24, + "confidence": 0.949 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.36, + "confidence": 0.725 + }, + { + "text": "évidemment,", + "start": 31.42, + "end": 31.62, + "confidence": 0.211 + }, + { + "text": "il", + "start": 31.7, + "end": 31.76, + "confidence": 0.959 + }, + { + "text": "faudrait", + "start": 31.76, + "end": 31.94, + "confidence": 0.994 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.14, + "confidence": 0.847 + }, + { + "text": "aussi", + "start": 32.14, + "end": 32.36, + "confidence": 0.924 + }, + { + "text": "des", + "start": 32.36, + "end": 32.46, + "confidence": 0.921 + }, + { + "text": "applications", + "start": 32.46, + "end": 32.88, + "confidence": 0.856 + }, + { + "text": "qui", + "start": 32.88, + "end": 33.2, + "confidence": 0.66 + }, + { + "text": "permettent", + "start": 33.2, + "end": 33.8, + "confidence": 0.952 + }, + { + "text": "de", + "start": 33.8, + "end": 33.96, + "confidence": 0.951 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.4, + "confidence": 0.72 + }, + { + "text": "le", + "start": 34.4, + "end": 34.52, + "confidence": 0.645 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.76, + "confidence": 0.63 + } + ] + }, + { + "id": 10, + "seek": 2804, + "start": 34.8, + "end": 37.86, + "text": " tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 50698, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13, + 50860 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.735, + "words": [ + { + "text": "tout", + "start": 34.8, + "end": 35.04, + "confidence": 0.946 + }, + { + "text": "flu", + "start": 35.04, + "end": 35.32, + "confidence": 0.488 + }, + { + "text": "de", + "start": 35.32, + "end": 35.64, + "confidence": 0.345 + }, + { + "text": "la", + "start": 35.64, + "end": 35.78, + "confidence": 0.922 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.903 + }, + { + "text": "web", + "start": 36.24, + "end": 36.64, + "confidence": 0.912 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.84, + "confidence": 0.571 + }, + { + "text": "aller", + "start": 36.84, + "end": 37.06, + "confidence": 0.991 + }, + { + "text": "directement", + "start": 37.06, + "end": 37.48, + "confidence": 0.981 + }, + { + "text": "en", + "start": 37.48, + "end": 37.7, + "confidence": 0.656 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.86, + "confidence": 0.768 + } + ] + }, + { + "id": 11, + "seek": 2804, + "start": 38.78, + "end": 43.13, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 50860, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11, + 51121 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.738, + "words": [ + { + "text": "Bref,", + "start": 38.78, + "end": 38.8, + "confidence": 0.972 + }, + { + "text": "tout", + "start": 38.84, + "end": 39.04, + "confidence": 0.816 + }, + { + "text": "ça,", + "start": 39.04, + "end": 39.46, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 39.54, + "end": 39.76, + "confidence": 0.905 + }, + { + "text": "sont", + "start": 39.76, + "end": 39.96, + "confidence": 0.981 + }, + { + "text": "les", + "start": 39.96, + "end": 40.1, + "confidence": 0.982 + }, + { + "text": "conditions", + "start": 40.1, + "end": 40.64, + "confidence": 0.957 + }, + { + "text": "qui", + "start": 40.64, + "end": 40.96, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.58, + "confidence": 0.993 + }, + { + "text": "de", + "start": 41.58, + "end": 41.64, + "confidence": 0.993 + }, + { + "text": "créer", + "start": 41.64, + "end": 42.08, + "confidence": 0.909 + }, + { + "text": "cette", + "start": 42.08, + "end": 42.34, + "confidence": 0.477 + }, + { + "text": "objet,", + "start": 42.34, + "end": 42.64, + "confidence": 0.237 + }, + { + "text": "en", + "start": 42.7, + "end": 42.84, + "confidence": 0.22 + }, + { + "text": "Nicolas,", + "start": 42.84, + "end": 43.13, + "confidence": 0.539 + } + ] + }, + { + "id": 12, + "seek": 2804, + "start": 43.13, + "end": 46.58, + "text": " dit qu'il est très symbolablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 51121, + 6176, + 421, + 6, + 388, + 871, + 5732, + 5986, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51290 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.717, + "words": [ + { + "text": "dit", + "start": 43.13, + "end": 43.56, + "confidence": 0.692 + }, + { + "text": "qu'il", + "start": 43.56, + "end": 43.76, + "confidence": 0.957 + }, + { + "text": "est", + "start": 43.76, + "end": 43.9, + "confidence": 0.848 + }, + { + "text": "très", + "start": 43.9, + "end": 44.08, + "confidence": 0.445 + }, + { + "text": "symbolablement", + "start": 44.08, + "end": 44.86, + "confidence": 0.321 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.54, + "confidence": 0.655 + }, + { + "text": "dans", + "start": 45.54, + "end": 45.74, + "confidence": 0.817 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 46.02, + "confidence": 0.825 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.984 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.58, + "confidence": 0.99 + } + ] + }, + { + "id": 13, + "seek": 2804, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 51290, + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13, + 51402 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.718, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.24, + "confidence": 0.928 + }, + { + "text": "ça", + "start": 47.24, + "end": 47.48, + "confidence": 0.75 + }, + { + "text": "s'assoulait", + "start": 47.48, + "end": 47.84, + "confidence": 0.616 + }, + { + "text": "d'une", + "start": 47.84, + "end": 48.08, + "confidence": 0.675 + }, + { + "text": "autre", + "start": 48.08, + "end": 48.18, + "confidence": 0.964 + }, + { + "text": "interrogation.", + "start": 48.18, + "end": 48.76, + "confidence": 0.855 + } + ] + }, + { + "id": 14, + "seek": 2804, + "start": 49.42, + "end": 54.93, + "text": " Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport", + "tokens": [ + 51402, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 1208, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212, + 517, + 18018, + 51710 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.704, + "words": [ + { + "text": "Est-ce", + "start": 49.42, + "end": 49.7, + "confidence": 0.919 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.989 + }, + { + "text": "le", + "start": 49.74, + "end": 49.82, + "confidence": 0.872 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.02, + "confidence": 0.946 + }, + { + "text": "que", + "start": 50.02, + "end": 50.16, + "confidence": 0.922 + }, + { + "text": "cette", + "start": 50.16, + "end": 50.3, + "confidence": 0.944 + }, + { + "text": "objet", + "start": 50.3, + "end": 50.64, + "confidence": 0.963 + }, + { + "text": "soit", + "start": 50.64, + "end": 51.1, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 51.1, + "end": 51.82, + "confidence": 0.928 + }, + { + "text": "un", + "start": 51.82, + "end": 52.08, + "confidence": 0.514 + }, + { + "text": "d'huits", + "start": 52.08, + "end": 52.34, + "confidence": 0.246 + }, + { + "text": "que", + "start": 52.34, + "end": 52.44, + "confidence": 0.974 + }, + { + "text": "notre", + "start": 52.44, + "end": 52.66, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.66, + "end": 53.24, + "confidence": 0.778 + }, + { + "text": "a", + "start": 53.24, + "end": 53.5, + "confidence": 0.572 + }, + { + "text": "lui", + "start": 53.5, + "end": 53.7, + "confidence": 0.698 + }, + { + "text": "est", + "start": 53.7, + "end": 54.04, + "confidence": 0.88 + }, + { + "text": "aussi", + "start": 54.04, + "end": 54.52, + "confidence": 0.814 + }, + { + "text": "un", + "start": 54.52, + "end": 54.7, + "confidence": 0.48 + }, + { + "text": "rapport", + "start": 54.7, + "end": 54.93, + "confidence": 0.951 + } + ] + }, + { + "id": 15, + "seek": 2804, + "start": 54.93, + "end": 55.88, + "text": " inédit ?", + "tokens": [ + 51710, + 294, + 7811, + 270, + 2506, + 51760 + ], + "temperature": 0.0, + "avg_logprob": -0.4260108065442974, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.1274678260087967, + "confidence": 0.86, + "words": [ + { + "text": "inédit ?", + "start": 54.93, + "end": 55.88, + "confidence": 0.86 + } + ] + }, + { + "id": 16, + "seek": 5596, + "start": 55.96, + "end": 59.36, + "text": " Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 50530 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.62, + "words": [ + { + "text": "Est-ce", + "start": 55.96, + "end": 56.38, + "confidence": 0.55 + }, + { + "text": "que", + "start": 56.38, + "end": 56.44, + "confidence": 0.892 + }, + { + "text": "le", + "start": 56.44, + "end": 56.58, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.9, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.9, + "end": 57.14, + "confidence": 0.904 + }, + { + "text": "a", + "start": 57.14, + "end": 57.2, + "confidence": 0.959 + }, + { + "text": "au", + "start": 57.2, + "end": 57.28, + "confidence": 0.272 + }, + { + "text": "sein", + "start": 57.28, + "end": 57.42, + "confidence": 0.17 + }, + { + "text": "de", + "start": 57.42, + "end": 57.56, + "confidence": 0.23 + }, + { + "text": "foi", + "start": 57.56, + "end": 57.58, + "confidence": 0.121 + }, + { + "text": "n'est", + "start": 57.58, + "end": 57.78, + "confidence": 0.79 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.28, + "confidence": 0.547 + }, + { + "text": "à", + "start": 58.28, + "end": 58.48, + "confidence": 0.934 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.7, + "confidence": 0.851 + }, + { + "text": "qu'on", + "start": 58.7, + "end": 58.94, + "confidence": 0.926 + }, + { + "text": "entretenait", + "start": 58.94, + "end": 59.36, + "confidence": 0.56 + } + ] + }, + { + "id": 17, + "seek": 5596, + "start": 59.36, + "end": 63.14, + "text": " à d'autres objectes techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50530, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.757, + "words": [ + { + "text": "à", + "start": 59.36, + "end": 59.44, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.68, + "confidence": 0.857 + }, + { + "text": "objectes", + "start": 59.68, + "end": 60.02, + "confidence": 0.507 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.44, + "confidence": 0.463 + }, + { + "text": "comme", + "start": 60.44, + "end": 61.04, + "confidence": 0.639 + }, + { + "text": "la", + "start": 61.04, + "end": 61.52, + "confidence": 0.907 + }, + { + "text": "voiture", + "start": 61.52, + "end": 61.86, + "confidence": 0.947 + }, + { + "text": "ou", + "start": 61.86, + "end": 62.46, + "confidence": 0.758 + }, + { + "text": "le", + "start": 62.46, + "end": 62.68, + "confidence": 0.912 + }, + { + "text": "téléphone ?", + "start": 62.68, + "end": 63.14, + "confidence": 0.983 + } + ] + }, + { + "id": 18, + "seek": 5596, + "start": 65.4, + "end": 66.16, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 50714, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13, + 50872 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.881, + "words": [ + { + "text": "Il", + "start": 65.4, + "end": 65.5, + "confidence": 0.901 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.859 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.962 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.68, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.68, + "end": 66.16, + "confidence": 0.853 + } + ] + }, + { + "id": 19, + "seek": 5596, + "start": 66.16, + "end": 69.92, + "text": " On s'est espèrent de nous voter dans la relation à l'objet.", + "tokens": [ + 50872, + 1282, + 262, + 6, + 377, + 7089, + 1462, + 1753, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13, + 51058 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.501, + "words": [ + { + "text": "On", + "start": 66.16, + "end": 67.08, + "confidence": 0.301 + }, + { + "text": "s'est", + "start": 67.08, + "end": 67.3, + "confidence": 0.561 + }, + { + "text": "espèrent", + "start": 67.3, + "end": 67.62, + "confidence": 0.134 + }, + { + "text": "de", + "start": 67.62, + "end": 67.66, + "confidence": 0.99 + }, + { + "text": "nous", + "start": 67.66, + "end": 67.84, + "confidence": 0.763 + }, + { + "text": "voter", + "start": 67.84, + "end": 68.36, + "confidence": 0.183 + }, + { + "text": "dans", + "start": 68.36, + "end": 68.86, + "confidence": 0.82 + }, + { + "text": "la", + "start": 68.86, + "end": 68.96, + "confidence": 0.614 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.949 + }, + { + "text": "à", + "start": 69.24, + "end": 69.42, + "confidence": 0.75 + }, + { + "text": "l'objet.", + "start": 69.42, + "end": 69.92, + "confidence": 0.887 + } + ] + }, + { + "id": 20, + "seek": 5596, + "start": 70.2, + "end": 71.22, + "text": " C'est facilement éterricion.", + "tokens": [ + 51058, + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 1341, + 313, + 13, + 51122 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.443, + "words": [ + { + "text": "C'est", + "start": 70.2, + "end": 70.34, + "confidence": 0.896 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.7, + "confidence": 0.566 + }, + { + "text": "éterricion.", + "start": 70.7, + "end": 71.22, + "confidence": 0.231 + } + ] + }, + { + "id": 21, + "seek": 5596, + "start": 71.64, + "end": 76.97, + "text": " Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu", + "tokens": [ + 51122, + 20429, + 631, + 635, + 5418, + 368, + 287, + 6, + 20835, + 271, + 15540, + 1030, + 5385, + 2096, + 1473, + 257, + 45768, + 719, + 11, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 51416 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.457, + "words": [ + { + "text": "Parce", + "start": 71.64, + "end": 71.94, + "confidence": 0.68 + }, + { + "text": "que", + "start": 71.94, + "end": 72.32, + "confidence": 0.566 + }, + { + "text": "la", + "start": 72.32, + "end": 72.56, + "confidence": 0.333 + }, + { + "text": "passion", + "start": 72.56, + "end": 72.9, + "confidence": 0.266 + }, + { + "text": "de", + "start": 72.9, + "end": 73.26, + "confidence": 0.365 + }, + { + "text": "l'utilisateur", + "start": 73.26, + "end": 74.82, + "confidence": 0.5 + }, + { + "text": "et", + "start": 74.82, + "end": 74.92, + "confidence": 0.452 + }, + { + "text": "ses", + "start": 74.92, + "end": 75.04, + "confidence": 0.095 + }, + { + "text": "affices", + "start": 75.04, + "end": 75.24, + "confidence": 0.302 + }, + { + "text": "a", + "start": 75.24, + "end": 75.38, + "confidence": 0.589 + }, + { + "text": "dépendance,", + "start": 75.38, + "end": 76.06, + "confidence": 0.561 + }, + { + "text": "cette", + "start": 76.32, + "end": 76.34, + "confidence": 0.399 + }, + { + "text": "objet", + "start": 76.34, + "end": 76.54, + "confidence": 0.924 + }, + { + "text": "d'un", + "start": 76.54, + "end": 76.92, + "confidence": 0.781 + }, + { + "text": "lieu", + "start": 76.92, + "end": 76.97, + "confidence": 0.305 + } + ] + }, + { + "id": 22, + "seek": 5596, + "start": 76.97, + "end": 82.15, + "text": " en fait, une espèce de relation de médiation avec le monde qui rendent encore avec", + "tokens": [ + 51416, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368, + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 10122, + 4163, + 51671 + ], + "temperature": 0.0, + "avg_logprob": -0.6289185115269252, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948665380477905, + "confidence": 0.746, + "words": [ + { + "text": "en", + "start": 76.97, + "end": 77.2, + "confidence": 0.77 + }, + { + "text": "fait,", + "start": 77.2, + "end": 77.32, + "confidence": 0.944 + }, + { + "text": "une", + "start": 77.46, + "end": 77.48, + "confidence": 0.699 + }, + { + "text": "espèce", + "start": 77.48, + "end": 77.9, + "confidence": 0.97 + }, + { + "text": "de", + "start": 77.9, + "end": 78.08, + "confidence": 0.993 + }, + { + "text": "relation", + "start": 78.08, + "end": 78.5, + "confidence": 0.827 + }, + { + "text": "de", + "start": 78.5, + "end": 78.94, + "confidence": 0.714 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.5, + "confidence": 0.88 + }, + { + "text": "avec", + "start": 79.5, + "end": 79.74, + "confidence": 0.968 + }, + { + "text": "le", + "start": 79.74, + "end": 79.94, + "confidence": 0.986 + }, + { + "text": "monde", + "start": 79.94, + "end": 80.44, + "confidence": 0.906 + }, + { + "text": "qui", + "start": 80.44, + "end": 81.14, + "confidence": 0.823 + }, + { + "text": "rendent", + "start": 81.14, + "end": 81.8, + "confidence": 0.586 + }, + { + "text": "encore", + "start": 81.8, + "end": 81.98, + "confidence": 0.198 + }, + { + "text": "avec", + "start": 81.98, + "end": 82.15, + "confidence": 0.491 + } + ] + }, + { + "id": 23, + "seek": 8210, + "start": 82.15, + "end": 83.44, + "text": " la maille de celles formes de rogeur.", + "tokens": [ + 50364, + 635, + 463, + 3409, + 368, + 2815, + 279, + 1254, + 279, + 368, + 744, + 432, + 374, + 13, + 50440 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.465, + "words": [ + { + "text": "la", + "start": 82.15, + "end": 82.22, + "confidence": 0.647 + }, + { + "text": "maille", + "start": 82.22, + "end": 82.4, + "confidence": 0.136 + }, + { + "text": "de", + "start": 82.4, + "end": 82.48, + "confidence": 0.588 + }, + { + "text": "celles", + "start": 82.48, + "end": 82.74, + "confidence": 0.561 + }, + { + "text": "formes", + "start": 82.74, + "end": 82.96, + "confidence": 0.778 + }, + { + "text": "de", + "start": 82.96, + "end": 83.0, + "confidence": 0.936 + }, + { + "text": "rogeur.", + "start": 83.0, + "end": 83.44, + "confidence": 0.433 + } + ] + }, + { + "id": 24, + "seek": 8210, + "start": 83.98, + "end": 87.86, + "text": " Donc, à objets inédits, rapport inédits.", + "tokens": [ + 50440, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13, + 50640 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.744, + "words": [ + { + "text": "Donc,", + "start": 83.98, + "end": 84.46, + "confidence": 0.831 + }, + { + "text": "à", + "start": 84.56, + "end": 84.96, + "confidence": 0.699 + }, + { + "text": "objets", + "start": 84.96, + "end": 85.44, + "confidence": 0.525 + }, + { + "text": "inédits,", + "start": 85.44, + "end": 86.24, + "confidence": 0.7 + }, + { + "text": "rapport", + "start": 86.32, + "end": 86.92, + "confidence": 0.945 + }, + { + "text": "inédits.", + "start": 86.92, + "end": 87.86, + "confidence": 0.908 + } + ] + }, + { + "id": 25, + "seek": 8210, + "start": 88.1, + "end": 94.2, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance", + "tokens": [ + 50640, + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.669, + "words": [ + { + "text": "Et,", + "start": 88.1, + "end": 88.3, + "confidence": 0.865 + }, + { + "text": "ce", + "start": 88.58, + "end": 88.84, + "confidence": 0.278 + }, + { + "text": "rapport,", + "start": 88.84, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.933 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.781 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.92, + "confidence": 0.304 + }, + { + "text": "Nicolas,", + "start": 89.92, + "end": 90.22, + "confidence": 0.652 + }, + { + "text": "frère", + "start": 90.78, + "end": 91.1, + "confidence": 0.432 + }, + { + "text": "caractérisée", + "start": 91.1, + "end": 91.8, + "confidence": 0.595 + }, + { + "text": "par", + "start": 91.8, + "end": 92.14, + "confidence": 0.907 + }, + { + "text": "un", + "start": 92.14, + "end": 92.34, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.34, + "end": 92.98, + "confidence": 0.921 + }, + { + "text": "de", + "start": 92.98, + "end": 93.24, + "confidence": 0.841 + }, + { + "text": "dépendance", + "start": 93.24, + "end": 94.2, + "confidence": 0.674 + } + ] + }, + { + "id": 26, + "seek": 8210, + "start": 94.36, + "end": 95.08, + "text": " et de rogeur.", + "tokens": [ + 50972, + 1030, + 368, + 744, + 432, + 374, + 13, + 51022 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.85, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.97 + }, + { + "text": "de", + "start": 94.52, + "end": 94.64, + "confidence": 0.996 + }, + { + "text": "rogeur.", + "start": 94.64, + "end": 95.08, + "confidence": 0.771 + } + ] + }, + { + "id": 27, + "seek": 8210, + "start": 96.3, + "end": 100.48, + "text": " Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes", + "tokens": [ + 51022, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 11, + 5732, + 2007, + 394, + 3486, + 287, + 6, + 29093, + 730, + 2657, + 279, + 51286 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.752, + "words": [ + { + "text": "Bon,", + "start": 96.3, + "end": 96.32, + "confidence": 0.601 + }, + { + "text": "en", + "start": 96.36, + "end": 96.58, + "confidence": 0.919 + }, + { + "text": "vrai,", + "start": 96.58, + "end": 96.92, + "confidence": 0.993 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.987 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.56, + "confidence": 0.933 + }, + { + "text": "remonter", + "start": 97.56, + "end": 98.08, + "confidence": 0.61 + }, + { + "text": "très,", + "start": 98.08, + "end": 98.58, + "confidence": 0.975 + }, + { + "text": "très", + "start": 98.8, + "end": 98.86, + "confidence": 0.986 + }, + { + "text": "filmant", + "start": 98.86, + "end": 99.42, + "confidence": 0.409 + }, + { + "text": "tout", + "start": 99.42, + "end": 99.68, + "confidence": 0.346 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.08, + "confidence": 0.876 + }, + { + "text": "des", + "start": 100.08, + "end": 100.22, + "confidence": 0.957 + }, + { + "text": "objectes", + "start": 100.22, + "end": 100.48, + "confidence": 0.844 + } + ] + }, + { + "id": 28, + "seek": 8210, + "start": 100.48, + "end": 105.24, + "text": " techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement", + "tokens": [ + 51286, + 7512, + 1030, + 368, + 9580, + 1536, + 911, + 313, + 2680, + 3269, + 4941, + 2449, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 51530 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.614, + "words": [ + { + "text": "techniques", + "start": 100.48, + "end": 101.0, + "confidence": 0.951 + }, + { + "text": "et", + "start": 101.0, + "end": 101.58, + "confidence": 0.944 + }, + { + "text": "de", + "start": 101.58, + "end": 101.72, + "confidence": 0.975 + }, + { + "text": "leur", + "start": 101.72, + "end": 101.8, + "confidence": 0.806 + }, + { + "text": "infertion", + "start": 101.8, + "end": 102.34, + "confidence": 0.383 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.298 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.68, + "confidence": 0.422 + }, + { + "text": "vieux", + "start": 102.68, + "end": 103.08, + "confidence": 0.388 + }, + { + "text": "pour", + "start": 103.08, + "end": 103.1, + "confidence": 0.275 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.976 + }, + { + "text": "si", + "start": 103.64, + "end": 103.8, + "confidence": 0.367 + }, + { + "text": "ce", + "start": 103.8, + "end": 103.92, + "confidence": 0.983 + }, + { + "text": "rapport", + "start": 103.92, + "end": 104.22, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.22, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.24, + "confidence": 0.908 + } + ] + }, + { + "id": 29, + "seek": 8210, + "start": 105.24, + "end": 106.1, + "text": " inédit.", + "tokens": [ + 51530, + 294, + 7811, + 270, + 13, + 51580 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.939, + "words": [ + { + "text": "inédit.", + "start": 105.24, + "end": 106.1, + "confidence": 0.939 + } + ] + }, + { + "id": 30, + "seek": 8210, + "start": 106.16, + "end": 109.44, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 51580, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51738 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.811, + "words": [ + { + "text": "Mais", + "start": 106.16, + "end": 106.5, + "confidence": 0.723 + }, + { + "text": "j'ai", + "start": 106.5, + "end": 106.92, + "confidence": 0.91 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.34, + "confidence": 0.956 + }, + { + "text": "comme", + "start": 107.34, + "end": 107.58, + "confidence": 0.7 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.74, + "confidence": 0.953 + }, + { + "text": "que", + "start": 107.74, + "end": 108.06, + "confidence": 0.941 + }, + { + "text": "Nicolas", + "start": 108.06, + "end": 108.46, + "confidence": 0.994 + }, + { + "text": "se", + "start": 108.46, + "end": 108.68, + "confidence": 0.714 + }, + { + "text": "trompe", + "start": 108.68, + "end": 109.02, + "confidence": 0.54 + }, + { + "text": "pas", + "start": 109.02, + "end": 109.16, + "confidence": 0.887 + }, + { + "text": "vraiment.", + "start": 109.16, + "end": 109.44, + "confidence": 0.964 + } + ] + }, + { + "id": 31, + "seek": 8210, + "start": 109.92, + "end": 110.86, + "text": " Pour autant, je sache.", + "tokens": [ + 51738, + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 51808 + ], + "temperature": 0.0, + "avg_logprob": -0.45919205838401844, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066800743341446, + "confidence": 0.6, + "words": [ + { + "text": "Pour", + "start": 109.92, + "end": 110.1, + "confidence": 0.486 + }, + { + "text": "autant,", + "start": 110.1, + "end": 110.28, + "confidence": 0.746 + }, + { + "text": "je", + "start": 110.42, + "end": 110.48, + "confidence": 0.932 + }, + { + "text": "sache.", + "start": 110.48, + "end": 110.86, + "confidence": 0.479 + } + ] + }, + { + "id": 32, + "seek": 11098, + "start": 111.04, + "end": 115.0, + "text": " Il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 50364, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 50568 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.822, + "words": [ + { + "text": "Il", + "start": 111.04, + "end": 111.2, + "confidence": 0.905 + }, + { + "text": "y", + "start": 111.2, + "end": 111.3, + "confidence": 0.934 + }, + { + "text": "a", + "start": 111.3, + "end": 111.56, + "confidence": 0.894 + }, + { + "text": "eu", + "start": 111.56, + "end": 111.6, + "confidence": 0.839 + }, + { + "text": "plein", + "start": 111.6, + "end": 111.88, + "confidence": 0.774 + }, + { + "text": "de", + "start": 111.88, + "end": 112.12, + "confidence": 0.944 + }, + { + "text": "discussions", + "start": 112.12, + "end": 112.6, + "confidence": 0.681 + }, + { + "text": "autour", + "start": 112.6, + "end": 113.04, + "confidence": 0.978 + }, + { + "text": "de", + "start": 113.04, + "end": 113.48, + "confidence": 0.974 + }, + { + "text": "la", + "start": 113.48, + "end": 113.56, + "confidence": 0.967 + }, + { + "text": "voiture", + "start": 113.56, + "end": 113.88, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.14, + "confidence": 0.661 + }, + { + "text": "même", + "start": 114.14, + "end": 114.34, + "confidence": 0.99 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.289 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.0, + "confidence": 0.986 + } + ] + }, + { + "id": 33, + "seek": 11098, + "start": 115.52, + "end": 119.51, + "text": " Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même", + "tokens": [ + 50568, + 6313, + 11, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 50790 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.702, + "words": [ + { + "text": "Mais,", + "start": 115.52, + "end": 115.82, + "confidence": 0.984 + }, + { + "text": "la", + "start": 115.92, + "end": 116.0, + "confidence": 0.961 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.42, + "confidence": 0.616 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.82, + "confidence": 0.956 + }, + { + "text": "pas", + "start": 116.82, + "end": 117.04, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.04, + "end": 117.2, + "confidence": 0.958 + }, + { + "text": "même", + "start": 117.2, + "end": 117.38, + "confidence": 0.929 + }, + { + "text": "mort,", + "start": 117.38, + "end": 117.58, + "confidence": 0.685 + }, + { + "text": "donc", + "start": 117.7, + "end": 118.02, + "confidence": 0.931 + }, + { + "text": "le", + "start": 118.02, + "end": 118.34, + "confidence": 0.95 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.64, + "confidence": 0.57 + }, + { + "text": "n'en", + "start": 118.64, + "end": 118.82, + "confidence": 0.429 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.96, + "confidence": 0.056 + }, + { + "text": "n'était", + "start": 118.96, + "end": 119.16, + "confidence": 0.972 + }, + { + "text": "pas", + "start": 119.16, + "end": 119.32, + "confidence": 0.993 + }, + { + "text": "du", + "start": 119.32, + "end": 119.42, + "confidence": 0.821 + }, + { + "text": "même", + "start": 119.42, + "end": 119.51, + "confidence": 0.789 + } + ] + }, + { + "id": 34, + "seek": 11098, + "start": 119.51, + "end": 119.72, + "text": " mort.", + "tokens": [ + 50790, + 6599, + 13, + 50840 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.644, + "words": [ + { + "text": "mort.", + "start": 119.51, + "end": 119.72, + "confidence": 0.644 + } + ] + }, + { + "id": 35, + "seek": 11098, + "start": 120.04, + "end": 121.24, + "text": " On peut adorer sa bagnure.", + "tokens": [ + 50840, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 13, + 50890 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.614, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.22, + "confidence": 0.981 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.36, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.7, + "confidence": 0.858 + }, + { + "text": "sa", + "start": 120.7, + "end": 120.88, + "confidence": 0.918 + }, + { + "text": "bagnure.", + "start": 120.88, + "end": 121.24, + "confidence": 0.314 + } + ] + }, + { + "id": 36, + "seek": 11098, + "start": 121.38, + "end": 123.06, + "text": " On a par besoin pour plein de choses.", + "tokens": [ + 50890, + 1282, + 257, + 971, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.721, + "words": [ + { + "text": "On", + "start": 121.38, + "end": 121.56, + "confidence": 0.896 + }, + { + "text": "a", + "start": 121.56, + "end": 121.64, + "confidence": 0.653 + }, + { + "text": "par", + "start": 121.64, + "end": 121.8, + "confidence": 0.161 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.947 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.5, + "confidence": 0.976 + }, + { + "text": "plein", + "start": 122.5, + "end": 122.72, + "confidence": 0.858 + }, + { + "text": "de", + "start": 122.72, + "end": 122.8, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.06, + "confidence": 0.988 + } + ] + }, + { + "id": 37, + "seek": 11098, + "start": 123.36, + "end": 126.46, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50972, + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 51142 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.835, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.5, + "confidence": 0.606 + }, + { + "text": "là,", + "start": 123.5, + "end": 123.76, + "confidence": 0.499 + }, + { + "text": "le", + "start": 123.82, + "end": 124.06, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.06, + "end": 124.42, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 124.84, + "end": 124.96, + "confidence": 0.79 + }, + { + "text": "on", + "start": 124.96, + "end": 125.06, + "confidence": 0.988 + }, + { + "text": "va", + "start": 125.06, + "end": 125.18, + "confidence": 0.967 + }, + { + "text": "se", + "start": 125.18, + "end": 125.26, + "confidence": 0.868 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.6, + "confidence": 0.803 + }, + { + "text": "on", + "start": 125.82, + "end": 126.06, + "confidence": 0.98 + }, + { + "text": "la", + "start": 126.06, + "end": 126.2, + "confidence": 0.81 + }, + { + "text": "laisse.", + "start": 126.2, + "end": 126.46, + "confidence": 0.984 + } + ] + }, + { + "id": 38, + "seek": 11098, + "start": 127.06, + "end": 130.1, + "text": " On la pade en la main quand on est colis, qu'on n'a même pas au chiot.", + "tokens": [ + 51142, + 1282, + 635, + 280, + 762, + 465, + 635, + 2135, + 6932, + 322, + 871, + 1173, + 271, + 11, + 421, + 6, + 266, + 297, + 6, + 64, + 5698, + 1736, + 1609, + 417, + 6471, + 13, + 51334 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.543, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.32, + "confidence": 0.606 + }, + { + "text": "la", + "start": 127.32, + "end": 127.5, + "confidence": 0.349 + }, + { + "text": "pade", + "start": 127.5, + "end": 127.72, + "confidence": 0.181 + }, + { + "text": "en", + "start": 127.72, + "end": 127.88, + "confidence": 0.774 + }, + { + "text": "la", + "start": 127.88, + "end": 128.04, + "confidence": 0.796 + }, + { + "text": "main", + "start": 128.04, + "end": 128.3, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.46, + "confidence": 0.483 + }, + { + "text": "on", + "start": 128.46, + "end": 128.62, + "confidence": 0.994 + }, + { + "text": "est", + "start": 128.62, + "end": 128.74, + "confidence": 0.708 + }, + { + "text": "colis,", + "start": 128.74, + "end": 129.12, + "confidence": 0.503 + }, + { + "text": "qu'on", + "start": 129.22, + "end": 129.32, + "confidence": 0.776 + }, + { + "text": "n'a", + "start": 129.32, + "end": 129.42, + "confidence": 0.441 + }, + { + "text": "même", + "start": 129.42, + "end": 129.56, + "confidence": 0.333 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.74, + "confidence": 1.0 + }, + { + "text": "au", + "start": 129.74, + "end": 129.88, + "confidence": 0.638 + }, + { + "text": "chiot.", + "start": 129.88, + "end": 130.1, + "confidence": 0.514 + } + ] + }, + { + "id": 39, + "seek": 11098, + "start": 130.94, + "end": 135.32, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure", + "tokens": [ + 51334, + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 51574 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.655, + "words": [ + { + "text": "On", + "start": 130.94, + "end": 131.04, + "confidence": 0.99 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.588 + }, + { + "text": "être", + "start": 131.28, + "end": 131.6, + "confidence": 0.443 + }, + { + "text": "émervé", + "start": 131.6, + "end": 132.24, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.24, + "end": 132.5, + "confidence": 0.85 + }, + { + "text": "son", + "start": 132.5, + "end": 132.7, + "confidence": 0.443 + }, + { + "text": "mome", + "start": 132.7, + "end": 133.04, + "confidence": 0.255 + }, + { + "text": "qui", + "start": 133.04, + "end": 133.3, + "confidence": 0.911 + }, + { + "text": "occupeait", + "start": 133.3, + "end": 133.74, + "confidence": 0.638 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.807 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.02, + "confidence": 0.974 + }, + { + "text": "de", + "start": 134.02, + "end": 134.24, + "confidence": 0.929 + }, + { + "text": "téléphone", + "start": 134.24, + "end": 134.56, + "confidence": 0.978 + }, + { + "text": "pendant", + "start": 134.56, + "end": 134.92, + "confidence": 0.899 + }, + { + "text": "une", + "start": 134.92, + "end": 135.18, + "confidence": 0.814 + }, + { + "text": "heure", + "start": 135.18, + "end": 135.32, + "confidence": 0.482 + } + ] + }, + { + "id": 40, + "seek": 11098, + "start": 135.32, + "end": 137.04, + "text": " chaque soir pour discuter avec un copain.", + "tokens": [ + 51574, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.44286312419138135, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.14988036453723907, + "confidence": 0.955, + "words": [ + { + "text": "chaque", + "start": 135.32, + "end": 135.6, + "confidence": 0.99 + }, + { + "text": "soir", + "start": 135.6, + "end": 135.8, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.8, + "end": 136.0, + "confidence": 0.981 + }, + { + "text": "discuter", + "start": 136.0, + "end": 136.34, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.34, + "end": 136.5, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.5, + "end": 136.62, + "confidence": 0.967 + }, + { + "text": "copain.", + "start": 136.62, + "end": 137.04, + "confidence": 0.954 + } + ] + }, + { + "id": 41, + "seek": 13698, + "start": 137.32, + "end": 141.84, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui.", + "tokens": [ + 50376, + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556, + 13, + 50606 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.735, + "words": [ + { + "text": "Mais", + "start": 137.32, + "end": 137.52, + "confidence": 0.984 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.7, + "confidence": 0.877 + }, + { + "text": "ne", + "start": 137.7, + "end": 137.8, + "confidence": 0.671 + }, + { + "text": "ressemble", + "start": 137.8, + "end": 138.22, + "confidence": 0.731 + }, + { + "text": "pas", + "start": 138.22, + "end": 138.76, + "confidence": 0.532 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.98 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.08, + "confidence": 0.574 + }, + { + "text": "qu'on", + "start": 139.08, + "end": 139.22, + "confidence": 0.96 + }, + { + "text": "peut", + "start": 139.22, + "end": 139.38, + "confidence": 0.548 + }, + { + "text": "ressentir", + "start": 139.38, + "end": 140.06, + "confidence": 0.888 + }, + { + "text": "à", + "start": 140.06, + "end": 140.26, + "confidence": 0.434 + }, + { + "text": "voir", + "start": 140.26, + "end": 140.54, + "confidence": 0.951 + }, + { + "text": "même", + "start": 140.54, + "end": 140.96, + "confidence": 0.297 + }, + { + "text": "mome", + "start": 140.96, + "end": 141.22, + "confidence": 0.495 + }, + { + "text": "aujourd'hui.", + "start": 141.22, + "end": 141.84, + "confidence": 0.961 + } + ] + }, + { + "id": 42, + "seek": 13698, + "start": 141.96, + "end": 145.86, + "text": " Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 50606, + 14674, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540, + 50810 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.663, + "words": [ + { + "text": "Continuellement", + "start": 141.96, + "end": 142.84, + "confidence": 0.843 + }, + { + "text": "avec", + "start": 142.84, + "end": 143.24, + "confidence": 0.642 + }, + { + "text": "son", + "start": 143.24, + "end": 143.42, + "confidence": 0.865 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.72, + "confidence": 0.447 + }, + { + "text": "dans", + "start": 143.72, + "end": 143.92, + "confidence": 0.59 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.974 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.3, + "confidence": 0.995 + }, + { + "text": "comme", + "start": 144.42, + "end": 144.56, + "confidence": 0.895 + }, + { + "text": "c'était", + "start": 144.56, + "end": 144.84, + "confidence": 0.595 + }, + { + "text": "une", + "start": 144.84, + "end": 144.96, + "confidence": 0.973 + }, + { + "text": "sorte", + "start": 144.96, + "end": 145.1, + "confidence": 0.655 + }, + { + "text": "de", + "start": 145.1, + "end": 145.4, + "confidence": 0.314 + }, + { + "text": "estimateur", + "start": 145.4, + "end": 145.86, + "confidence": 0.489 + } + ] + }, + { + "id": 43, + "seek": 13698, + "start": 145.96, + "end": 149.0, + "text": " extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 50810, + 1279, + 4212, + 368, + 2916, + 607, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13, + 50954 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.415, + "words": [ + { + "text": "extère", + "start": 145.96, + "end": 146.4, + "confidence": 0.373 + }, + { + "text": "de", + "start": 146.4, + "end": 146.54, + "confidence": 0.254 + }, + { + "text": "tomber", + "start": 146.54, + "end": 146.7, + "confidence": 0.102 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.455 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.34, + "confidence": 0.751 + }, + { + "text": "à", + "start": 147.34, + "end": 147.46, + "confidence": 0.444 + }, + { + "text": "l'éantrénée,", + "start": 147.46, + "end": 147.9, + "confidence": 0.491 + }, + { + "text": "ça", + "start": 148.0, + "end": 148.02, + "confidence": 0.798 + }, + { + "text": "m'a", + "start": 148.02, + "end": 148.26, + "confidence": 0.529 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.44, + "confidence": 0.136 + }, + { + "text": "immédiate.", + "start": 148.44, + "end": 149.0, + "confidence": 0.502 + } + ] + }, + { + "id": 44, + "seek": 13698, + "start": 149.08, + "end": 151.98, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi.", + "tokens": [ + 50954, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 11, + 4428, + 6212, + 13, + 51126 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.564, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.28, + "confidence": 0.375 + }, + { + "text": "je", + "start": 149.32, + "end": 149.4, + "confidence": 0.468 + }, + { + "text": "dis", + "start": 149.4, + "end": 149.46, + "confidence": 0.321 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.952 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.8, + "confidence": 0.964 + }, + { + "text": "le", + "start": 149.8, + "end": 149.92, + "confidence": 0.992 + }, + { + "text": "mome,", + "start": 149.92, + "end": 150.12, + "confidence": 0.621 + }, + { + "text": "mais", + "start": 150.46, + "end": 150.52, + "confidence": 0.692 + }, + { + "text": "évidemment,", + "start": 150.52, + "end": 151.12, + "confidence": 0.781 + }, + { + "text": "va", + "start": 151.26, + "end": 151.36, + "confidence": 0.39 + }, + { + "text": "là,", + "start": 151.36, + "end": 151.46, + "confidence": 0.591 + }, + { + "text": "bon", + "start": 151.6, + "end": 151.68, + "confidence": 0.827 + }, + { + "text": "aussi.", + "start": 151.68, + "end": 151.98, + "confidence": 0.163 + } + ] + }, + { + "id": 45, + "seek": 13698, + "start": 152.64, + "end": 154.64, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 51126, + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13, + 51248 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.86, + "words": [ + { + "text": "Donc,", + "start": 152.64, + "end": 153.08, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 153.16, + "end": 153.58, + "confidence": 0.979 + }, + { + "text": "immédiate", + "start": 153.58, + "end": 154.28, + "confidence": 0.839 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.64, + "confidence": 0.815 + } + ] + }, + { + "id": 46, + "seek": 13698, + "start": 155.66, + "end": 158.36, + "text": " Mais pourquoi, à ton impression qu'on en sortira, j'amé?", + "tokens": [ + 51248, + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 11, + 361, + 6, + 335, + 526, + 30, + 51440 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.709, + "words": [ + { + "text": "Mais", + "start": 155.66, + "end": 155.88, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.88, + "end": 156.28, + "confidence": 0.898 + }, + { + "text": "à", + "start": 156.38, + "end": 156.58, + "confidence": 0.838 + }, + { + "text": "ton", + "start": 156.58, + "end": 156.7, + "confidence": 0.952 + }, + { + "text": "impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.939 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.889 + }, + { + "text": "en", + "start": 157.28, + "end": 157.4, + "confidence": 0.801 + }, + { + "text": "sortira,", + "start": 157.4, + "end": 157.88, + "confidence": 0.577 + }, + { + "text": "j'amé?", + "start": 157.96, + "end": 158.36, + "confidence": 0.463 + } + ] + }, + { + "id": 47, + "seek": 13698, + "start": 159.16, + "end": 163.32, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux", + "tokens": [ + 51440, + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550, + 25673, + 3551, + 303, + 3409, + 2449, + 51678 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.715, + "words": [ + { + "text": "Et", + "start": 159.16, + "end": 159.32, + "confidence": 0.83 + }, + { + "text": "puis,", + "start": 159.32, + "end": 159.46, + "confidence": 0.495 + }, + { + "text": "il", + "start": 159.62, + "end": 159.64, + "confidence": 0.594 + }, + { + "text": "faut", + "start": 159.64, + "end": 159.66, + "confidence": 0.951 + }, + { + "text": "en", + "start": 159.66, + "end": 159.78, + "confidence": 0.937 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.14, + "confidence": 0.984 + }, + { + "text": "la", + "start": 160.14, + "end": 160.42, + "confidence": 0.611 + }, + { + "text": "faute", + "start": 160.42, + "end": 160.68, + "confidence": 0.591 + }, + { + "text": "sur", + "start": 160.68, + "end": 160.98, + "confidence": 0.966 + }, + { + "text": "les", + "start": 160.98, + "end": 161.24, + "confidence": 0.83 + }, + { + "text": "gens", + "start": 161.24, + "end": 161.46, + "confidence": 0.987 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.6, + "confidence": 0.98 + }, + { + "text": "ont", + "start": 161.6, + "end": 161.68, + "confidence": 0.951 + }, + { + "text": "créé", + "start": 161.68, + "end": 162.3, + "confidence": 0.95 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.48, + "confidence": 0.909 + }, + { + "text": "critique", + "start": 162.48, + "end": 162.72, + "confidence": 0.033 + }, + { + "text": "merveilleux", + "start": 162.72, + "end": 163.32, + "confidence": 0.841 + } + ] + }, + { + "id": 48, + "seek": 13698, + "start": 163.32, + "end": 165.34, + "text": " et diabolique et diabolique par que merveilleux.", + "tokens": [ + 51678, + 1030, + 1026, + 14923, + 1925, + 1030, + 1026, + 14923, + 1925, + 971, + 631, + 3551, + 303, + 3409, + 2449, + 13, + 51778 + ], + "temperature": 0.0, + "avg_logprob": -0.5335124224081806, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.10962729901075363, + "confidence": 0.709, + "words": [ + { + "text": "et", + "start": 163.32, + "end": 163.44, + "confidence": 0.946 + }, + { + "text": "diabolique", + "start": 163.44, + "end": 163.86, + "confidence": 0.574 + }, + { + "text": "et", + "start": 163.86, + "end": 163.94, + "confidence": 0.239 + }, + { + "text": "diabolique", + "start": 163.94, + "end": 164.38, + "confidence": 0.893 + }, + { + "text": "par", + "start": 164.38, + "end": 164.62, + "confidence": 0.548 + }, + { + "text": "que", + "start": 164.62, + "end": 164.82, + "confidence": 0.529 + }, + { + "text": "merveilleux.", + "start": 164.82, + "end": 165.34, + "confidence": 0.981 + } + ] + }, + { + "id": 49, + "seek": 16526, + "start": 166.9, + "end": 168.8, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 50410, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13, + 50542 + ], + "temperature": 0.0, + "avg_logprob": -0.6641062498092651, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.18914750218391418, + "confidence": 0.765, + "words": [ + { + "text": "Les", + "start": 166.9, + "end": 167.06, + "confidence": 0.699 + }, + { + "text": "économistes", + "start": 167.06, + "end": 167.52, + "confidence": 0.916 + }, + { + "text": "parlent", + "start": 167.52, + "end": 167.78, + "confidence": 0.794 + }, + { + "text": "de", + "start": 167.78, + "end": 167.84, + "confidence": 0.813 + }, + { + "text": "dépendance", + "start": 167.84, + "end": 168.36, + "confidence": 0.699 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.963 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.8, + "confidence": 0.487 + } + ] + }, + { + "id": 50, + "seek": 16526, + "start": 168.82, + "end": 173.42, + "text": " Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus,", + "tokens": [ + 50542, + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 271, + 11, + 517, + 12703, + 1108, + 1433, + 533, + 11, + 465, + 8368, + 474, + 30677, + 11, + 50784 + ], + "temperature": 0.0, + "avg_logprob": -0.6641062498092651, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.18914750218391418, + "confidence": 0.474, + "words": [ + { + "text": "Ces", + "start": 168.82, + "end": 169.14, + "confidence": 0.439 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.4, + "confidence": 0.459 + }, + { + "text": "en", + "start": 169.44, + "end": 169.64, + "confidence": 0.747 + }, + { + "text": "fait,", + "start": 169.64, + "end": 169.66, + "confidence": 0.976 + }, + { + "text": "on", + "start": 169.72, + "end": 169.74, + "confidence": 0.302 + }, + { + "text": "est", + "start": 169.74, + "end": 169.9, + "confidence": 0.815 + }, + { + "text": "un", + "start": 169.9, + "end": 170.48, + "confidence": 0.364 + }, + { + "text": "santé", + "start": 170.48, + "end": 170.8, + "confidence": 0.951 + }, + { + "text": "qui", + "start": 170.8, + "end": 170.92, + "confidence": 0.836 + }, + { + "text": "a", + "start": 170.92, + "end": 171.02, + "confidence": 0.821 + }, + { + "text": "été", + "start": 171.02, + "end": 171.12, + "confidence": 0.994 + }, + { + "text": "étabis,", + "start": 171.12, + "end": 171.5, + "confidence": 0.342 + }, + { + "text": "un", + "start": 171.76, + "end": 171.9, + "confidence": 0.233 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.16, + "confidence": 0.311 + }, + { + "text": "mon", + "start": 172.16, + "end": 172.36, + "confidence": 0.205 + }, + { + "text": "termine,", + "start": 172.36, + "end": 172.72, + "confidence": 0.4 + }, + { + "text": "en", + "start": 172.8, + "end": 172.82, + "confidence": 0.31 + }, + { + "text": "marchand", + "start": 172.82, + "end": 173.14, + "confidence": 0.816 + }, + { + "text": "dessus,", + "start": 173.14, + "end": 173.42, + "confidence": 0.205 + } + ] + }, + { + "id": 51, + "seek": 16526, + "start": 173.86, + "end": 177.42, + "text": " soit des finissants débordes, des finissants, une signalétique.", + "tokens": [ + 50784, + 12703, + 730, + 962, + 891, + 1719, + 36529, + 765, + 279, + 11, + 730, + 962, + 891, + 1719, + 11, + 2251, + 6358, + 42379, + 13, + 50974 + ], + "temperature": 0.0, + "avg_logprob": -0.6641062498092651, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.18914750218391418, + "confidence": 0.582, + "words": [ + { + "text": "soit", + "start": 173.86, + "end": 174.4, + "confidence": 0.993 + }, + { + "text": "des", + "start": 174.4, + "end": 175.2, + "confidence": 0.762 + }, + { + "text": "finissants", + "start": 175.2, + "end": 175.58, + "confidence": 0.436 + }, + { + "text": "débordes,", + "start": 175.58, + "end": 175.98, + "confidence": 0.467 + }, + { + "text": "des", + "start": 176.14, + "end": 176.18, + "confidence": 0.22 + }, + { + "text": "finissants,", + "start": 176.18, + "end": 176.62, + "confidence": 0.961 + }, + { + "text": "une", + "start": 176.66, + "end": 176.84, + "confidence": 0.748 + }, + { + "text": "signalétique.", + "start": 176.84, + "end": 177.42, + "confidence": 0.567 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/verbose.cpu/accurate.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/accurate.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..532d5eaf637389f5cfdbecc58823d2ccd29cfacb --- /dev/null +++ b/tests/expected/verbose.cpu/accurate.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.460 --> 00:00.780] Боже +[00:00.780 --> 00:01.980] улыл! +[00:02.020 --> 00:02.260] Таков +[00:02.260 --> 00:02.400] уже +[00:02.400 --> 00:02.460] на +[00:02.460 --> 00:02.800] меня! +[00:32.980 --> 00:33.280] Боже +[00:33.280 --> 00:33.800] улыл! +[00:34.400 --> 00:34.800] Эскому +[00:34.800 --> 00:34.960] за +[00:34.960 --> 00:35.260] меня! diff --git a/tests/expected/verbose.cpu/accurate.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/accurate.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..bbe59141c7bb85cc4567ff0ac51949b691aea753 --- /dev/null +++ b/tests/expected/verbose.cpu/accurate.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,13 @@ +[00:00.460 --> 00:00.680] Bonjour! +[00:01.860 --> 00:02.140] Est-ce +[00:02.140 --> 00:02.180] que +[00:02.180 --> 00:02.340] vous +[00:02.340 --> 00:02.480] allez +[00:02.480 --> 00:02.800] bien? +[00:32.980 --> 00:33.160] Bonjour! +[00:34.400 --> 00:34.610] Bonjour! +[00:34.610 --> 00:34.660] Est-ce +[00:34.660 --> 00:34.720] que +[00:34.720 --> 00:34.860] vous +[00:34.860 --> 00:35.000] allez +[00:35.000 --> 00:35.340] bien? diff --git a/tests/expected/verbose.cpu/efficient.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/efficient.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..532d5eaf637389f5cfdbecc58823d2ccd29cfacb --- /dev/null +++ b/tests/expected/verbose.cpu/efficient.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.460 --> 00:00.780] Боже +[00:00.780 --> 00:01.980] улыл! +[00:02.020 --> 00:02.260] Таков +[00:02.260 --> 00:02.400] уже +[00:02.400 --> 00:02.460] на +[00:02.460 --> 00:02.800] меня! +[00:32.980 --> 00:33.280] Боже +[00:33.280 --> 00:33.800] улыл! +[00:34.400 --> 00:34.800] Эскому +[00:34.800 --> 00:34.960] за +[00:34.960 --> 00:35.260] меня! diff --git a/tests/expected/verbose.cpu/efficient.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/efficient.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..bbe59141c7bb85cc4567ff0ac51949b691aea753 --- /dev/null +++ b/tests/expected/verbose.cpu/efficient.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,13 @@ +[00:00.460 --> 00:00.680] Bonjour! +[00:01.860 --> 00:02.140] Est-ce +[00:02.140 --> 00:02.180] que +[00:02.180 --> 00:02.340] vous +[00:02.340 --> 00:02.480] allez +[00:02.480 --> 00:02.800] bien? +[00:32.980 --> 00:33.160] Bonjour! +[00:34.400 --> 00:34.610] Bonjour! +[00:34.610 --> 00:34.660] Est-ce +[00:34.660 --> 00:34.720] que +[00:34.720 --> 00:34.860] vous +[00:34.860 --> 00:35.000] allez +[00:35.000 --> 00:35.340] bien? diff --git a/tests/expected/verbose.cpu/hf_bonjour.wav.stdout b/tests/expected/verbose.cpu/hf_bonjour.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..99635535fd87302067c97ce13d42f2988dacdd83 --- /dev/null +++ b/tests/expected/verbose.cpu/hf_bonjour.wav.stdout @@ -0,0 +1,3 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: French +[00:00.120 --> 00:00.640] Bonjour. diff --git a/tests/expected/verbose/accurate.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/accurate.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..6bfee4b97e33192bc46f69994bddcd817c220226 --- /dev/null +++ b/tests/expected/verbose/accurate.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,10 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: Russian +[00:00.440 --> 00:00.860] Боже +[00:00.860 --> 00:01.880] улыл! +[00:01.880 --> 00:02.300] Эскому +[00:02.300 --> 00:03.140] зарегиан! +[00:32.980 --> 00:33.360] Боже +[00:33.360 --> 00:34.020] улыл! +[00:34.420 --> 00:34.840] Эскому +[00:34.840 --> 00:35.720] зарегиан! diff --git a/tests/expected/verbose/accurate.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/accurate.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..ec0d882ef119c14b9da62584ee57728eab7691fd --- /dev/null +++ b/tests/expected/verbose/accurate.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.440 --> 00:01.440] Bonjour ! +[00:01.880 --> 00:02.200] Est-ce +[00:02.200 --> 00:02.240] que +[00:02.240 --> 00:02.360] vous +[00:02.360 --> 00:02.560] allez +[00:02.560 --> 00:03.120] bien ? +[00:32.980 --> 00:33.480] Bonjour ! +[00:34.420 --> 00:34.760] Esque +[00:34.760 --> 00:34.900] vous +[00:34.900 --> 00:35.080] allez +[00:35.080 --> 00:35.480] bien ! diff --git a/tests/expected/verbose/efficient.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/efficient.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..6bfee4b97e33192bc46f69994bddcd817c220226 --- /dev/null +++ b/tests/expected/verbose/efficient.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,10 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: Russian +[00:00.440 --> 00:00.860] Боже +[00:00.860 --> 00:01.880] улыл! +[00:01.880 --> 00:02.300] Эскому +[00:02.300 --> 00:03.140] зарегиан! +[00:32.980 --> 00:33.360] Боже +[00:33.360 --> 00:34.020] улыл! +[00:34.420 --> 00:34.840] Эскому +[00:34.840 --> 00:35.720] зарегиан! diff --git a/tests/expected/verbose/efficient.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/efficient.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..ec0d882ef119c14b9da62584ee57728eab7691fd --- /dev/null +++ b/tests/expected/verbose/efficient.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.440 --> 00:01.440] Bonjour ! +[00:01.880 --> 00:02.200] Est-ce +[00:02.200 --> 00:02.240] que +[00:02.240 --> 00:02.360] vous +[00:02.360 --> 00:02.560] allez +[00:02.560 --> 00:03.120] bien ? +[00:32.980 --> 00:33.480] Bonjour ! +[00:34.420 --> 00:34.760] Esque +[00:34.760 --> 00:34.900] vous +[00:34.900 --> 00:35.080] allez +[00:35.080 --> 00:35.480] bien ! diff --git a/tests/expected/verbose/hf_bonjour.wav.stdout b/tests/expected/verbose/hf_bonjour.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..99635535fd87302067c97ce13d42f2988dacdd83 --- /dev/null +++ b/tests/expected/verbose/hf_bonjour.wav.stdout @@ -0,0 +1,3 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: French +[00:00.120 --> 00:00.640] Bonjour. diff --git a/tests/expected/verbose/vad_auditok_words.wav.stdout b/tests/expected/verbose/vad_auditok_words.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..9d3af7b842f6c5ec97a7c63f0b73168e966f7b98 --- /dev/null +++ b/tests/expected/verbose/vad_auditok_words.wav.stdout @@ -0,0 +1,8 @@ +[00:00.750 --> 00:01.470] settlement, +[00:02.950 --> 00:03.670] Kentucky, +[00:05.770 --> 00:06.290] causing +[00:07.900 --> 00:08.950] damage, +[00:10.900 --> 00:11.700] President, +[00:14.200 --> 00:14.780] expansion, +[00:17.120 --> 00:17.760] hospital, +[00:20.730 --> 00:21.290] devastated. diff --git a/tests/expected/verbose/vad_silero3.0_words.wav.stdout b/tests/expected/verbose/vad_silero3.0_words.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..79e9f6107b1e48c0323c7fcfd843f9f3a9bf1be9 --- /dev/null +++ b/tests/expected/verbose/vad_silero3.0_words.wav.stdout @@ -0,0 +1,8 @@ +[00:00.760 --> 00:01.480] settlement, +[00:02.890 --> 00:03.670] Kentucky, +[00:05.710 --> 00:06.270] causing +[00:07.850 --> 00:08.930] damage, +[00:10.940 --> 00:11.700] president, +[00:14.200 --> 00:14.780] expansion, +[00:17.120 --> 00:17.780] hospital, +[00:20.140 --> 00:21.380] devastated. diff --git a/tests/expected/verbose/vad_silero3.1_words.wav.stdout b/tests/expected/verbose/vad_silero3.1_words.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..54b0fd0716a5eed94c6a92c3ea372b879cec5401 --- /dev/null +++ b/tests/expected/verbose/vad_silero3.1_words.wav.stdout @@ -0,0 +1,8 @@ +[00:00.760 --> 00:01.480] settlement, +[00:02.920 --> 00:03.660] Kentucky, +[00:05.760 --> 00:06.260] causing +[00:07.850 --> 00:08.940] damage, +[00:10.840 --> 00:11.700] president, +[00:14.190 --> 00:14.770] expansion, +[00:17.130 --> 00:17.750] hospital, +[00:21.200 --> 00:21.380] devastated. diff --git a/tests/expected/verbose/vad_words.wav.stdout b/tests/expected/verbose/vad_words.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..5dbf58fc48651f530a5a99a116062ab15fe66aeb --- /dev/null +++ b/tests/expected/verbose/vad_words.wav.stdout @@ -0,0 +1,8 @@ +[00:00.760 --> 00:01.460] settlement, +[00:02.900 --> 00:03.680] Kentucky, +[00:05.710 --> 00:06.270] causing +[00:07.890 --> 00:08.940] damage, +[00:10.930 --> 00:11.690] president, +[00:14.070 --> 00:14.770] expansion, +[00:17.140 --> 00:17.780] hospital, +[00:20.730 --> 00:21.370] devastated. diff --git a/tests/json_schema.json b/tests/json_schema.json new file mode 100644 index 0000000000000000000000000000000000000000..c3df000500227a0b937ac14f7f626414b978d77e --- /dev/null +++ b/tests/json_schema.json @@ -0,0 +1,146 @@ +{ + "type": "object", + "properties": { + "text": {"type": "string"}, + "segments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer", "minimum":0}, + "start": {"type": "number", "minimum":0}, + "end": {"type": "number", "minimum":0}, + "text": {"type": "string"}, + "tokens": { + "type": "array", + "items": {"type": "integer", "minimum": 0, "maximum": 51864} + }, + "temperature": {"type": "number", "minimum":0, "maximum":1}, + "avg_logprob": {"type": "number", "maximum":0}, + "compression_ratio": {"type": "number", "minimum":0}, + "no_speech_prob": {"type": "number", "minimum":0, "maximum":1}, + "confidence": {"type": "number", "minimum":0, "maximum":1}, + "words": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": {"type": "string"}, + "start": {"type": "number", "minimum":0}, + "end": {"type": "number", "minimum":0}, + "confidence": {"type": "number", "minimum":0, "maximum":1} + } + } + } + } + }, + "minItems": 0, + "uniqueItems": true + }, + "language": {"type": "string"}, + "language_probs": { + "type": "object", + "properties": { + "en": {"type": "number", "minimum":0, "maximum":1}, + "zh": {"type": "number", "minimum":0, "maximum":1}, + "de": {"type": "number", "minimum":0, "maximum":1}, + "es": {"type": "number", "minimum":0, "maximum":1}, + "ru": {"type": "number", "minimum":0, "maximum":1}, + "ko": {"type": "number", "minimum":0, "maximum":1}, + "fr": {"type": "number", "minimum":0, "maximum":1}, + "ja": {"type": "number", "minimum":0, "maximum":1}, + "pt": {"type": "number", "minimum":0, "maximum":1}, + "tr": {"type": "number", "minimum":0, "maximum":1}, + "pl": {"type": "number", "minimum":0, "maximum":1}, + "ca": {"type": "number", "minimum":0, "maximum":1}, + "nl": {"type": "number", "minimum":0, "maximum":1}, + "ar": {"type": "number", "minimum":0, "maximum":1}, + "sv": {"type": "number", "minimum":0, "maximum":1}, + "it": {"type": "number", "minimum":0, "maximum":1}, + "id": {"type": "number", "minimum":0, "maximum":1}, + "hi": {"type": "number", "minimum":0, "maximum":1}, + "fi": {"type": "number", "minimum":0, "maximum":1}, + "vi": {"type": "number", "minimum":0, "maximum":1}, + "he": {"type": "number", "minimum":0, "maximum":1}, + "uk": {"type": "number", "minimum":0, "maximum":1}, + "el": {"type": "number", "minimum":0, "maximum":1}, + "ms": {"type": "number", "minimum":0, "maximum":1}, + "cs": {"type": "number", "minimum":0, "maximum":1}, + "ro": {"type": "number", "minimum":0, "maximum":1}, + "da": {"type": "number", "minimum":0, "maximum":1}, + "hu": {"type": "number", "minimum":0, "maximum":1}, + "ta": {"type": "number", "minimum":0, "maximum":1}, + "no": {"type": "number", "minimum":0, "maximum":1}, + "th": {"type": "number", "minimum":0, "maximum":1}, + "ur": {"type": "number", "minimum":0, "maximum":1}, + "hr": {"type": "number", "minimum":0, "maximum":1}, + "bg": {"type": "number", "minimum":0, "maximum":1}, + "lt": {"type": "number", "minimum":0, "maximum":1}, + "la": {"type": "number", "minimum":0, "maximum":1}, + "mi": {"type": "number", "minimum":0, "maximum":1}, + "ml": {"type": "number", "minimum":0, "maximum":1}, + "cy": {"type": "number", "minimum":0, "maximum":1}, + "sk": {"type": "number", "minimum":0, "maximum":1}, + "te": {"type": "number", "minimum":0, "maximum":1}, + "fa": {"type": "number", "minimum":0, "maximum":1}, + "lv": {"type": "number", "minimum":0, "maximum":1}, + "bn": {"type": "number", "minimum":0, "maximum":1}, + "sr": {"type": "number", "minimum":0, "maximum":1}, + "az": {"type": "number", "minimum":0, "maximum":1}, + "sl": {"type": "number", "minimum":0, "maximum":1}, + "kn": {"type": "number", "minimum":0, "maximum":1}, + "et": {"type": "number", "minimum":0, "maximum":1}, + "mk": {"type": "number", "minimum":0, "maximum":1}, + "br": {"type": "number", "minimum":0, "maximum":1}, + "eu": {"type": "number", "minimum":0, "maximum":1}, + "is": {"type": "number", "minimum":0, "maximum":1}, + "hy": {"type": "number", "minimum":0, "maximum":1}, + "ne": {"type": "number", "minimum":0, "maximum":1}, + "mn": {"type": "number", "minimum":0, "maximum":1}, + "bs": {"type": "number", "minimum":0, "maximum":1}, + "kk": {"type": "number", "minimum":0, "maximum":1}, + "sq": {"type": "number", "minimum":0, "maximum":1}, + "sw": {"type": "number", "minimum":0, "maximum":1}, + "gl": {"type": "number", "minimum":0, "maximum":1}, + "mr": {"type": "number", "minimum":0, "maximum":1}, + "pa": {"type": "number", "minimum":0, "maximum":1}, + "si": {"type": "number", "minimum":0, "maximum":1}, + "km": {"type": "number", "minimum":0, "maximum":1}, + "sn": {"type": "number", "minimum":0, "maximum":1}, + "yo": {"type": "number", "minimum":0, "maximum":1}, + "so": {"type": "number", "minimum":0, "maximum":1}, + "af": {"type": "number", "minimum":0, "maximum":1}, + "oc": {"type": "number", "minimum":0, "maximum":1}, + "ka": {"type": "number", "minimum":0, "maximum":1}, + "be": {"type": "number", "minimum":0, "maximum":1}, + "tg": {"type": "number", "minimum":0, "maximum":1}, + "sd": {"type": "number", "minimum":0, "maximum":1}, + "gu": {"type": "number", "minimum":0, "maximum":1}, + "am": {"type": "number", "minimum":0, "maximum":1}, + "yi": {"type": "number", "minimum":0, "maximum":1}, + "lo": {"type": "number", "minimum":0, "maximum":1}, + "uz": {"type": "number", "minimum":0, "maximum":1}, + "fo": {"type": "number", "minimum":0, "maximum":1}, + "ht": {"type": "number", "minimum":0, "maximum":1}, + "ps": {"type": "number", "minimum":0, "maximum":1}, + "tk": {"type": "number", "minimum":0, "maximum":1}, + "nn": {"type": "number", "minimum":0, "maximum":1}, + "mt": {"type": "number", "minimum":0, "maximum":1}, + "sa": {"type": "number", "minimum":0, "maximum":1}, + "lb": {"type": "number", "minimum":0, "maximum":1}, + "my": {"type": "number", "minimum":0, "maximum":1}, + "bo": {"type": "number", "minimum":0, "maximum":1}, + "tl": {"type": "number", "minimum":0, "maximum":1}, + "mg": {"type": "number", "minimum":0, "maximum":1}, + "as": {"type": "number", "minimum":0, "maximum":1}, + "tt": {"type": "number", "minimum":0, "maximum":1}, + "haw": {"type": "number", "minimum":0, "maximum":1}, + "ln": {"type": "number", "minimum":0, "maximum":1}, + "ha": {"type": "number", "minimum":0, "maximum":1}, + "ba": {"type": "number", "minimum":0, "maximum":1}, + "jw": {"type": "number", "minimum":0, "maximum":1}, + "su": {"type": "number", "minimum":0, "maximum":1} + } + } + } +} diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..0b202b69af065c478fc99607b012ccec40cf5580 --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,48 @@ +import sys +import unittest + +from test_transcribe import * +import test_transcribe + +if __name__ == '__main__': + + # Handle several ways of generating expected outputs + if "--long" in sys.argv: + test_transcribe.SKIP_LONG_TEST_IF_CPU = False + sys.argv.remove("--long") + if "--generate" in sys.argv: + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate") + if "--generate_device" in sys.argv: + test_transcribe.GENERATE_DEVICE_DEPENDENT = True + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate_device") + if "--generate_new" in sys.argv: + test_transcribe.GENERATE_NEW_ONLY = True + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate_new") + if "--generate_all" in sys.argv: + test_transcribe.GENERATE_ALL = True + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate_all") + + # Pass options to whisper_timestamped CLI + args = sys.argv[1:] + for i, arg in enumerate(args): + if arg not in [ + "-h", "--help", + "-v", "--verbose", + "--locals", + "-q", "--quiet", + "-f", "--failfast", + "-c", "--catch", + "-b", "--buffer", + "-k", + ] \ + and not arg.startswith("Test") \ + and (i==0 or args[i-1] not in ["-k"]) \ + and (arg.startswith("-") or (i>0 and args[i-1].startswith("-"))): + test_transcribe.CMD_OPTIONS.append(arg) + sys.argv.remove(arg) + + unittest.main() diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py new file mode 100644 index 0000000000000000000000000000000000000000..fc5643c3e9e0ddfb415d544c9a71c4b8ad315198 --- /dev/null +++ b/tests/test_transcribe.py @@ -0,0 +1,902 @@ +__author__ = "Jérôme Louradour" +__credits__ = ["Jérôme Louradour"] +__license__ = "GPLv3" + +import unittest +import sys +import os +import subprocess +import shutil +import tempfile +import json +import torch +import jsonschema + +FAIL_IF_REFERENCE_NOT_FOUND = True +GENERATE_NEW_ONLY = False +GENERATE_ALL = False +GENERATE_DEVICE_DEPENDENT = False +SKIP_LONG_TEST_IF_CPU = True +CMD_OPTIONS = [] + + +class TestHelper(unittest.TestCase): + + def skipLongTests(self): + return SKIP_LONG_TEST_IF_CPU and not torch.cuda.is_available() + + def setUp(self): + self.maxDiff = None + self.createdReferences = [] + + def tearDown(self): + if GENERATE_ALL or GENERATE_NEW_ONLY or not FAIL_IF_REFERENCE_NOT_FOUND or GENERATE_DEVICE_DEPENDENT: + if len(self.createdReferences) > 0: + print("WARNING: Created references: " + + ", ".join(self.createdReferences).replace(self.get_data_path()+"/", "")) + else: + self.assertEqual(self.createdReferences, [], "Created references: " + + ", ".join(self.createdReferences).replace(self.get_data_path()+"/", "")) + + def get_main_path(self, fn=None, check=False): + return self._get_path("whisper_timestamped", fn, check=check) + + def get_output_path(self, fn=None): + if fn == None: + return tempfile.gettempdir() + return os.path.join(tempfile.gettempdir(), fn + self._extra_cmd_options()) + + def get_expected_path(self, fn=None, check=False): + return self._get_path("tests/expected" + self._extra_cmd_options(), fn, check=check) + + def _extra_cmd_options(self): + s = "".join([f.replace("-","").strip() for f in CMD_OPTIONS]) + if s: + return "." + s + return "" + + def get_data_files(self, files=None, excluded_by_default=["apollo11.mp3", "music.mp4", "arabic.mp3", "japanese.mp3", "empty.wav", "words.wav"]): + if files == None: + files = os.listdir(self.get_data_path()) + files = [f for f in files if f not in excluded_by_default and not f.endswith("json")] + files = sorted(files) + return [self.get_data_path(fn) for fn in files] + + def get_generated_files(self, input_filename, output_path, extensions): + for ext in extensions: + yield os.path.join(output_path, os.path.basename(input_filename) + "." + ext.lstrip(".")) + + def main_script(self, pyscript = "transcribe.py", exename = "whisper_timestamped"): + main_script = self.get_main_path(pyscript, check=False) + if not os.path.exists(main_script): + main_script = exename + return main_script + + def assertRun(self, cmd): + if isinstance(cmd, str): + return self.assertRun(cmd.split()) + curdir = os.getcwd() + os.chdir(tempfile.gettempdir()) + if cmd[0].endswith(".py"): + cmd = [sys.executable] + cmd + print("Running:", " ".join(cmd)) + p = subprocess.Popen(cmd, + # Otherwise ".local" path might be missing + env=dict( + os.environ, PYTHONPATH=os.pathsep.join(sys.path)), + stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + os.chdir(curdir) + (stdout, stderr) = p.communicate() + self.assertEqual(p.returncode, 0, msg=stderr.decode("utf-8")) + return (stdout.decode("utf-8"), stderr.decode("utf-8")) + + def assertNonRegression(self, content, reference, string_is_file=True): + """ + Check that a file/folder is the same as a reference file/folder. + """ + if isinstance(content, dict): + # Make a temporary file + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", encoding="utf8", delete=False) as f: + json.dump(content, f, indent=2, ensure_ascii=False) + content = f.name + res = self.assertNonRegression(f.name, reference) + os.remove(f.name) + return res + elif not isinstance(content, str): + raise ValueError(f"Invalid content type: {type(content)}") + + if not string_is_file: + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", encoding="utf8", delete=False) as f: + f.write(content) + content = f.name + res = self.assertNonRegression(f.name, reference) + os.remove(f.name) + return res + + self.assertTrue(os.path.exists(content), f"Missing file: {content}") + is_file = os.path.isfile(reference) if os.path.exists(reference) else os.path.isfile(content) + + reference = self.get_expected_path( + reference, check=FAIL_IF_REFERENCE_NOT_FOUND) + if not os.path.exists(reference) or ((GENERATE_ALL or GENERATE_DEVICE_DEPENDENT) and reference not in self.createdReferences): + dirname = os.path.dirname(reference) + if not os.path.isdir(dirname): + os.makedirs(dirname) + if is_file: + shutil.copyfile(content, reference) + else: + shutil.copytree(content, reference) + self.createdReferences.append(reference) + + if is_file: + self.assertTrue(os.path.isfile(content)) + self._check_file_non_regression(content, reference) + else: + self.assertTrue(os.path.isdir(content)) + for root, dirs, files in os.walk(content): + for f in files: + f_ref = os.path.join(reference, f) + self.assertTrue(os.path.isfile(f_ref), + f"Additional file: {f}") + self._check_file_non_regression( + os.path.join(root, f), f_ref) + for root, dirs, files in os.walk(reference): + for f in files: + f = os.path.join(content, f) + self.assertTrue(os.path.isfile(f), f"Missing file: {f}") + + def get_data_path(self, fn=None, check=True): + return self._get_path("tests/data", fn, check) + + def _get_path(self, prefix, fn=None, check=True): + path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + prefix + ) + if fn: + path = os.path.join(path, fn) + if check: + self.assertTrue(os.path.exists(path), f"Cannot find {path}") + return path + + def _check_file_non_regression(self, file, reference): + if file.endswith(".json"): + with open(file) as f: + content = json.load(f) + with open(reference) as f: + reference_content = json.load(f) + if "language" in content and "language" in reference_content: + content["language"] = self.norm_language(content["language"]) + reference_content["language"] = self.norm_language(reference_content["language"]) + self.assertClose(content, reference_content, + msg=f"File {file} does not match reference {reference}") + return + with open(file) as f: + content = f.readlines() + with open(reference) as f: + reference_content = f.readlines() + self.assertEqual(content, reference_content, + msg=f"File {file} does not match reference {reference}") + + def assertClose(self, obj1, obj2, msg=None): + return self.assertEqual(self.loose(obj1), self.loose(obj2), msg=msg) + + def loose(self, obj): + # Return an approximative value of an object + if isinstance(obj, list): + return [self.loose(a) for a in obj] + if isinstance(obj, float): + f = round(obj, 1) + return 0.0 if f == -0.0 else f + if isinstance(obj, dict): + return {k: self.loose(v) for k, v in obj.items()} + if isinstance(obj, tuple): + return tuple(self.loose(list(obj))) + if isinstance(obj, set): + return self.loose(list(obj), "set") + return obj + + def get_audio_duration(self, audio_file): + # Get the duration in sec *without introducing additional dependencies* + import whisper + return len(whisper.load_audio(audio_file)) / whisper.audio.SAMPLE_RATE + + def get_device_str(self): + import torch + return "cpu" if not torch.cuda.is_available() else "cuda" + + def norm_language(self, language): + # Cheap custom stuff to avoid importing everything + return { + "japanese": "ja", + }.get(language.lower(), language) + + +class TestHelperCli(TestHelper): + + json_schema = None + + def _test_cli_(self, opts, name, files=None, extensions=["words.json"], prefix=None, one_per_call=True, device_specific=None): + """ + Test command line + opts: list of options + name: name of the test + files: list of files to process + extensions: list of extensions to check, or None to test the stdout + prefix: prefix to add to the reference files + one_per_call: if True, each file is processed separately, otherwise all files are processed by a single process + """ + + opts = opts + CMD_OPTIONS + + output_dir = self.get_output_path(name) + + input_filenames = self.get_data_files(files) + + for i, input_filename in enumerate(input_filenames): + + # Butterfly effect: Results are different depending on the device for long files + duration = self.get_audio_duration(input_filename) + if device_specific is None: + device_dependent = duration > 60 or (duration > 30 and "tiny_fr" in name) or ("empty" in input_filename and "medium_auto" in name) + else: + device_dependent = device_specific + name_ = name + if device_dependent and self.get_device_str() != "cuda": + name_ += f".{self.get_device_str()}" + + def ref_name(output_filename): + return name_ + "/" + (f"{prefix}_" if prefix else "") + os.path.basename(output_filename) + generic_name = ref_name(input_filename + ".*") + + if GENERATE_DEVICE_DEPENDENT and not device_dependent: + print("Skipping non-regression test", generic_name) + continue + + if GENERATE_NEW_ONLY and min([os.path.exists(self.get_expected_path(ref_name(output_filename))) + for output_filename in self.get_generated_files(input_filename, output_dir, extensions=extensions)] + ): + print("Skipping non-regression test", generic_name) + continue + + print("Running non-regression test", generic_name) + + if one_per_call or i == 0: + if one_per_call: + (stdout, stderr) = self.assertRun([self.main_script(), input_filename, "--output_dir", output_dir, *opts]) + else: + (stdout, stderr) = self.assertRun([self.main_script(), *input_filenames, "--output_dir", output_dir, *opts]) + print(stdout) + print(stderr) + + output_json = self.get_generated_files(input_filename, output_dir, extensions=["words.json"]).__next__() + if os.path.isfile(output_json): + self.check_json(output_json) + + if extensions is None: + output_filename = list(self.get_generated_files(input_filename, output_dir, extensions=["stdout"]))[0] + self.assertNonRegression(stdout, ref_name(output_filename), string_is_file=False) + else: + for output_filename in self.get_generated_files(input_filename, output_dir, extensions=extensions): + self.assertNonRegression(output_filename, ref_name(output_filename)) + + + shutil.rmtree(output_dir, ignore_errors=True) + + def check_json(self, json_file): + with open(json_file) as f: + content = json.load(f) + + if self.json_schema is None: + schema_file = os.path.join(os.path.dirname(__file__), "json_schema.json") + self.assertTrue(os.path.isfile(schema_file), msg=f"Schema file {schema_file} not found") + self.json_schema = json.load(open(schema_file)) + + jsonschema.validate(instance=content, schema=self.json_schema) + + + +class TestTranscribeTiny(TestHelperCli): + + def test_cli_tiny_auto(self): + self._test_cli_( + ["--model", "tiny"], + "tiny_auto", + ) + + def test_cli_tiny_fr(self): + self._test_cli_( + ["--model", "tiny", "--language", "fr"], + "tiny_fr", + ) + + +class TestTranscribeMedium(TestHelperCli): + + def test_cli_medium_auto(self): + self._test_cli_( + ["--model", "medium"], + "medium_auto", + ) + + def test_cli_medium_fr(self): + self._test_cli_( + ["--model", "medium", "--language", "fr"], + "medium_fr", + ) + + +class TestTranscribeNaive(TestHelperCli): + + def test_naive(self): + + self._test_cli_( + ["--model", "small", "--language", "en", "--efficient", "--naive"], + "naive", + files=["apollo11.mp3"], + prefix="naive", + ) + + self._test_cli_( + ["--model", "small", "--language", "en", "--accurate"], + "naive", + files=["apollo11.mp3"], + prefix="accurate", + ) + + def test_stucked_segments(self): + self._test_cli_( + ["--model", "tiny"], + "corner_cases", + files=["apollo11.mp3"], + prefix="accurate.tiny", + ) + + +class TestTranscribeCornerCases(TestHelperCli): + + def test_stucked_lm(self): + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "small", "--language", "en", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="stucked_lm", + ) + + def test_punctuation_only(self): + + # When there is only a punctuation detected in a segment, it could cause issue #24 + self._test_cli_( + ["--model", "medium.en", "--efficient", "--punctuations", "False"], + "corner_cases", + files=["empty.wav"], + prefix="issue24", + ) + + def test_temperature(self): + + self._test_cli_( + ["--model", "small", "--language", "English", + "--condition", "False", "--temperature", "0.1", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="random.nocond", + ) + + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "small", "--language", "en", "--temperature", "0.2", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="random", + ) + + def test_not_conditioned(self): + + if not os.path.exists(self.get_data_path("music.mp4", check=False)): + return + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "medium", "--language", "en", "--condition", "False", "--efficient"], + "corner_cases", + files=["music.mp4"], + prefix="nocond", + ) + + self._test_cli_( + ["--model", "medium", "--language", "en", + "--condition", "False", "--temperature", "0.4", "--efficient"], + "corner_cases", + files=["music.mp4"], + prefix="nocond.random", + ) + + def test_large(self): + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "large-v2", "--language", "en", + "--condition", "False", "--temperature", "0.4", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="large", + ) + + if os.path.exists(self.get_data_path("arabic.mp3", check=False)): + self._test_cli_( + ["--model", "large-v2", "--language", "Arabic", "--efficient"], + "corner_cases", + files=["arabic.mp3"] + ) + + def test_gloria(self): + + for model in ["medium", "large-v2"]: + for dec in ["efficient", "accurate"]: + self._test_cli_( + ["--model", model, "--language", "en", "--" + dec], + "corner_cases", + files=["gloria.mp3"], + prefix=model + "." + dec, + ) + +class TestTranscribeMonolingual(TestHelperCli): + + def test_monolingual_tiny(self): + + files = ["bonjour_vous_allez_bien.mp3"] + + self._test_cli_( + ["--model", "tiny.en", "--efficient"], + "tiny.en", + files=files, + prefix="efficient", + ) + + self._test_cli_( + ["--model", "tiny.en", "--accurate"], + "tiny.en", + files=files, + prefix="accurate", + ) + + self._test_cli_( + ["--model", "tiny.en", "--condition", "False", "--efficient"], + "tiny.en", + files=files, + prefix="nocond", + ) + + def test_monolingual_small(self): + + if os.path.exists(self.get_data_path("arabic.mp3", check=False)): + self._test_cli_( + ["--model", "small.en", "--condition", "True", "--efficient"], + "small.en", + files=["arabic.mp3"], + device_specific=True, + ) + + +class TestTranscribeWithVad(TestHelperCli): + + def test_vad_default(self): + self._test_cli_( + ["--model", "tiny", "--accurate", "--language", "en", "--vad", "True", "--verbose", "True"], + "verbose", + files=["words.wav"], + prefix="vad", + extensions=None, + ) + + def test_vad_custom_silero(self): + self._test_cli_( + ["--model", "tiny", "--accurate", "--language", "en", "--vad", "silero:v3.1", "--verbose", "True"], + "verbose", + files=["words.wav"], + prefix="vad_silero3.1", + extensions=None, + ) + self._test_cli_( + ["--model", "tiny", "--accurate", "--language", "en", "--vad", "silero:v3.0", "--verbose", "True"], + "verbose", + files=["words.wav"], + prefix="vad_silero3.0", + extensions=None, + ) + + def test_vad_custom_auditok(self): + self._test_cli_( + ["--model", "tiny", "--language", "en", "--vad", "auditok", "--verbose", "True"], + "verbose", + files=["words.wav"], + prefix="vad_auditok", + extensions=None, + ) + + +class TestTranscribeUnspacedLanguage(TestHelperCli): + + def test_japanese(self): + + self._test_cli_( + ["--model", "tiny", "--efficient"], + "tiny_auto", + files=["japanese.mp3"], + device_specific=True, + ) + + self._test_cli_( + ["--model", "tiny", "--language", "Japanese", "--efficient"], + "tiny_auto", + files=["japanese.mp3"], + prefix="jp", + device_specific=True, + ) + + self._test_cli_( + ["--model", "tiny", "--accurate"], + "tiny_auto", + files=["japanese.mp3"], + prefix="accurate", + device_specific=True, + ) + + self._test_cli_( + ["--model", "tiny", "--language", "Japanese", "--accurate"], + "tiny_auto", + files=["japanese.mp3"], + prefix="accurate_jp", + device_specific=True, + ) + +class TestTranscribeFormats(TestHelperCli): + + def test_cli_outputs(self): + files = ["punctuations.mp3", "bonjour.wav"] + extensions = ["txt", "srt", "vtt", "words.srt", "words.vtt", + "words.json", "csv", "words.csv", "tsv", "words.tsv"] + opts = ["--model", "medium", "--language", "fr"] + + # An audio / model combination that produces coma + self._test_cli_( + opts, + "punctuations_yes", + files=files, + extensions=extensions, + one_per_call=False, + ) + self._test_cli_( + opts + ["--punctuations", "False"], + "punctuations_no", + files=files, + extensions=extensions, + one_per_call=False, + ) + + def test_verbose(self): + + files = ["bonjour_vous_allez_bien.mp3"] + opts = ["--model", "tiny", "--verbose", "True"] + + self._test_cli_( + ["--efficient", *opts], + "verbose", files=files, extensions=None, + prefix="efficient.auto", + device_specific=True, + ) + + self._test_cli_( + ["--language", "fr", "--efficient", *opts], + "verbose", files=files, extensions=None, + prefix="efficient.fr", + device_specific=True, + ) + + self._test_cli_( + opts, + "verbose", files=files, extensions=None, + prefix="accurate.auto", + device_specific=True, + ) + + self._test_cli_( + ["--language", "fr", *opts], + "verbose", files=files, extensions=None, + prefix="accurate.fr", + device_specific=True, + ) + +class TestMakeSubtitles(TestHelper): + + def test_make_subtitles(self): + + main_script = self.main_script("make_subtitles.py", "whisper_timestamped_make_subtitles") + + inputs = [ + self.get_data_path("smartphone.mp3.words.json"), + self.get_data_path("no_punctuations.mp3.words.json", check=True), + self.get_data_path("yes_punctuations.mp3.words.json", check=True), + ] + + for i, input in enumerate(inputs): + filename = os.path.basename(input).replace(".words.json", "") + for len in 6, 20, 50: + output_dir = self.get_output_path() + self.assertRun([main_script, + input if i > 0 else self.get_data_path(), output_dir, + "--max_length", str(len), + ]) + for format in "vtt", "srt",: + output_file = os.path.join(output_dir, f"{filename}.{format}") + self.assertTrue(os.path.isfile(output_file), msg=f"File {output_file} not found") + expected_file = f"split_subtitles/{filename.split('_')[-1]}_{len}.{format}" + self.assertNonRegression(output_file, expected_file) + os.remove(output_file) + self.assertRun([main_script, + input, output_file, + "--max_length", str(len), + ]) + self.assertTrue(os.path.isfile(output_file), msg=f"File {output_file} not found") + self.assertNonRegression(output_file, expected_file) + +class TestHuggingFaceModel(TestHelperCli): + + def test_hugging_face_model(self): + + self._test_cli_( + ["--model", "qanastek/whisper-tiny-french-cased", "--verbose", "True"], + "verbose", files=["bonjour.wav"], extensions=None, + prefix="hf", + device_specific=True, + ) + + import tempfile + from transformers import WhisperForConditionalGeneration, WhisperProcessor, GenerationConfig + tempfolder = os.path.join(tempfile.gettempdir(), "tmp_whisper-tiny-french-cased") + + for safe_serialization in False, True,: + for max_shard_size in "100MB", "10GB", : + shutil.rmtree(tempfolder, ignore_errors=True) + model = WhisperForConditionalGeneration.from_pretrained("qanastek/whisper-tiny-french-cased") + processor = WhisperProcessor.from_pretrained("qanastek/whisper-tiny-french-cased") + try: + model.save_pretrained(tempfolder, safe_serialization=safe_serialization, max_shard_size=max_shard_size) + processor.save_pretrained(tempfolder) + self._test_cli_( + ["--model", tempfolder, "--verbose", "True"], + "verbose", files=["bonjour.wav"], extensions=None, + prefix="hf", + device_specific=True, + ) + finally: + shutil.rmtree(tempfolder) + + +# "ZZZ" to run this test at last (because it will fill the CUDA with some memory) +class TestZZZPythonImport(TestHelper): + + def test_python_import(self): + + try: + import whisper_timestamped + except ModuleNotFoundError: + sys.path.append(os.path.realpath( + os.path.dirname(os.path.dirname(__file__)))) + import whisper_timestamped + + # Test version + version = whisper_timestamped.__version__ + self.assertTrue(isinstance(version, str)) + + (stdout, sterr) = self.assertRun([self.main_script(), "-v"]) + self.assertEqual(stdout.strip(), version) + + model = whisper_timestamped.load_model("tiny") + + # Check processing of different files + for filename in "bonjour.wav", "laugh1.mp3", "laugh2.mp3": + res = whisper_timestamped.transcribe( + model, self.get_data_path(filename)) + if self._can_generate_reference(): + self.assertNonRegression(res, f"tiny_auto/{filename}.words.json") + + for filename in "bonjour.wav", "laugh1.mp3", "laugh2.mp3": + res = whisper_timestamped.transcribe( + model, self.get_data_path(filename), language="fr") + if self._can_generate_reference(): + self.assertNonRegression(res, f"tiny_fr/{filename}.words.json") + + def _can_generate_reference(self): + return not GENERATE_DEVICE_DEPENDENT or self.get_device_str() != "cpu" + + def test_split_tokens(self): + + import whisper + whisperversion = whisper.__version__ + + import whisper_timestamped as whisper + from whisper_timestamped.transcribe import split_tokens_on_spaces + + tokenizer = whisper.tokenizer.get_tokenizer(True, language=None) + + # 220 means space + tokens = [50364, 220, 6455, 11, 2232, 11, 286, 2041, 11, 2232, 11, 8660, + 291, 808, 493, 220, 365, 11, 220, 445, 718, 505, 458, 13, 220, 50714] + + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'So,', 'uh,', 'I', 'guess,', 'uh,', 'wherever', 'you', 'come', 'up', 'with,', 'just', 'let', 'us', 'know.', '<|7.00|>'], + [['<|0.00|>'], + [' ', 'So', ','], + [' uh', ','], + [' I'], + [' guess', ','], + [' uh', ','], + [' wherever'], + [' you'], + [' come'], + [' up'], + [' ', ' with', ','], + [' ', ' just'], + [' let'], + [' us'], + [' know', '.', ' '], + ['<|7.00|>']], + [[50364], + [220, 6455, 11], + [2232, 11], + [286], + [2041, 11], + [2232, 11], + [8660], + [291], + [808], + [493], + [220, 365, 11], + [220, 445], + [718], + [505], + [458, 13, 220], + [50714] + ]) + ) + + tokens = [50366, 314, 6, 11771, 17134, 11, 4666, 11, 1022, 220, 875, 2557, 68, 11, 6992, 631, 269, 6, 377, 220, 409, 7282, 1956, 871, 566, 2707, 394, 1956, 256, 622, 8208, 631, 8208, 871, 517, 7282, 1956, 5977, 7418, 371, 1004, 306, 580, 11, 5977, 12, 9498, 9505, 84, 6, 50416] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + ( + ['<|0.04|>', "T'façon,", 'nous,', 'sur', 'la', 'touche,', 'parce', 'que', "c'est", 'un', 'sport', 'qui', 'est', 'important', 'qui', 'tue', 'deux', 'que', 'deux', 'est', 'un', 'sport', 'qui', 'peut', 'être', 'violent,', 'peut-être', "qu'", '<|1.04|>'], + [['<|0.04|>'], + [' T', "'", 'fa', 'çon', ','], + [' nous', ','], + [' sur'], + [' ', 'la'], + [' touch', 'e', ','], + [' parce'], + [' que'], + [' c', "'", 'est'], + [' ', 'un'], + [' sport'], + [' qui'], + [' est'], + [' im', 'port', 'ant'], + [' qui'], + [' t', 'ue'], + [' deux'], + [' que'], + [' deux'], + [' est'], + [' un'], + [' sport'], + [' qui'], + [' peut'], + [' être'], + [' v', 'io', 'le', 'nt', ','], + [' peut', '-', 'être'], + [' q', 'u', "'"], + ['<|1.04|>']], + [[50366], + [314, 6, 11771, 17134, 11], + [4666, 11], + [1022], + [220, 875], + [2557, 68, 11], + [6992], + [631], + [269, 6, 377], + [220, 409], + [7282], + [1956], + [871], + [566, 2707, 394], + [1956], + [256, 622], + [8208], + [631], + [8208], + [871], + [517], + [7282], + [1956], + [5977], + [7418], + [371, 1004, 306, 580, 11], + [5977, 12, 9498], + [9505, 84, 6], + [50416]] + ) + ) + + tokens = [50364, 220, 220, 6455, 11, 220, 220, 2232, 220, 220, 11, 220, 50714] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'So,', 'uh', ',', '<|7.00|>'], + [['<|0.00|>'], + [' ', ' ', 'So', ','], + [' ', ' ', ' uh'], + [' ', ' ', ',', ' '], + ['<|7.00|>']], + [[50364], [220, 220, 6455, 11], [220, 220, 2232], [220, 220, 11, 220], [50714]] + ) + ) + + # Careful with the double spaces at the end... + tokens = [50364, 220, 220, 6455, 11, 220, 220, 2232, 220, 220, 11, 220, 220, 50714] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'So,', 'uh', ',', '', '<|7.00|>'], + [['<|0.00|>'], + [' ', ' ', 'So', ','], + [' ', ' ', ' uh'], + [' ', ' ', ','], + [' ', ' '], + ['<|7.00|>']], + [[50364], [220, 220, 6455, 11], [220, 220, 2232], [220, 220, 11], [220, 220], [50714]] + ) + ) + + # Tokens that could be removed + tokens = [50364, 6024, 95, 8848, 7649, 8717, 38251, 11703, 3224, 51864] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'الآذان', 'نسمّه', '<|30.00|>'], + [['<|0.00|>'], ['', ' الآ', 'ذ', 'ان'], [' ن', 'سم', 'ّ', 'ه'], ['<|30.00|>']], + [[50364], [6024, 95, 8848, 7649], [8717, 38251, 11703, 3224], [51864]] + ) + ) + + # issue #61 + # Special tokens that are not timestamps + tokens = [50414, 805, 12, 17, 50299, 11, 568, 12, 18, 12, 21, 11, 502, 12, 17, 12, 51464] + # 50299 is "<|te|>" and appears as "" + te = "" + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|1.00|>', f'3-2{te},', '2-3-6,', '1-2-', '<|22.00|>'], + [['<|1.00|>'], [' 3', '-', '2', f'{te}', ','], [' 2', '-', '3', '-','6', ','], [' 1', '-', '2', '-'], ['<|22.00|>']], + [[50414], [805, 12, 17, 50299, 11], [568, 12, 18, 12, 21, 11], [502, 12, 17, 12], [51464]]) + ) + + tokenizer = whisper.tokenizer.get_tokenizer(False, language="en") + + # Just a punctuation character + tokens = [50363, 764, 51813] + + _dot = "." if whisperversion < "20230314" else " ." + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', ".", '<|29.00|>'], + [['<|0.00|>'], [_dot], ['<|29.00|>']], + [[50363], [764], [51813]] + ) + ) diff --git a/whisper_timestamped/__init__.py b/whisper_timestamped/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24fd61eeea212883b618d1fb974d40ab24f07ec0 --- /dev/null +++ b/whisper_timestamped/__init__.py @@ -0,0 +1,10 @@ +from whisper import available_models, _download, _MODELS # defined in __init__.py +from whisper import audio, decoding, model, normalizers, tokenizer, utils +from whisper.audio import load_audio, log_mel_spectrogram, pad_or_trim +from whisper.decoding import DecodingOptions, DecodingResult, decode, detect_language +from whisper.model import Whisper, ModelDimensions + +from .transcribe import transcribe_timestamped +from .transcribe import transcribe_timestamped as transcribe +from .transcribe import load_model +from .transcribe import __version__ \ No newline at end of file diff --git a/whisper_timestamped/make_subtitles.py b/whisper_timestamped/make_subtitles.py new file mode 100644 index 0000000000000000000000000000000000000000..bfc49ceda111efd3ac8065b082276aed26e79977 --- /dev/null +++ b/whisper_timestamped/make_subtitles.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 + +import json +import string + +_punctuation = "".join(c for c in string.punctuation if c not in ["-", "'"]) + "。,!?:”、…" + +def split_long_segments(segments, max_length, use_space = True): + new_segments = [] + for segment in segments: + text = segment["text"] + if len(text) <= max_length: + new_segments.append(segment) + else: + meta_words = segment["words"] + # Note: we do this in case punctuation were removed from words + if use_space: + # Split text around spaces and punctuations (keeping punctuations) + words = text.split() + else: + words = [w["text"] for w in meta_words] + if len(words) != len(meta_words): + new_words = [w["text"] for w in meta_words] + print(f"WARNING: {' '.join(words)} != {' '.join(new_words)}") + words = new_words + current_text = "" + current_start = segment["start"] + current_best_idx = None + current_best_end = None + current_best_next_start = None + for i, (word, meta) in enumerate(zip(words, meta_words)): + current_text_before = current_text + if current_text and use_space: + current_text += " " + current_text += word + + if len(current_text) > max_length and len(current_text_before): + start = current_start + if current_best_idx is not None: + text = current_text[:current_best_idx] + end = current_best_end + current_text = current_text[current_best_idx+1:] + current_start = current_best_next_start + else: + text = current_text_before + end = meta_words[i-1]["end"] + current_text = word + current_start = meta["start"] + + current_best_idx = None + current_best_end = None + current_best_next_start = None + + new_segments.append({"text": text, "start": start, "end": end}) + + # Try to cut after punctuation + if current_text and current_text[-1] in _punctuation: + current_best_idx = len(current_text) + current_best_end = meta["end"] + current_best_next_start = meta_words[i+1]["start"] if i+1 < len(meta_words) else None + + if len(current_text): + new_segments.append({"text": current_text, "start": current_start, "end": segment["end"]}) + + return new_segments + +def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = '.'): + assert seconds >= 0, "non-negative timestamp expected" + milliseconds = round(seconds * 1000.0) + + hours = milliseconds // 3_600_000 + milliseconds -= hours * 3_600_000 + + minutes = milliseconds // 60_000 + milliseconds -= minutes * 60_000 + + seconds = milliseconds // 1_000 + milliseconds -= seconds * 1_000 + + hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" + return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" + +def write_vtt(result, file): + print("WEBVTT\n", file=file) + for segment in result: + print( + f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + +def write_srt(result, file): + for i, segment in enumerate(result, start=1): + # write srt lines + print( + f"{i}\n" + f"{format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> " + f"{format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + +def cli(): + + import os + import argparse + + supported_formats = ["srt", "vtt"] + + parser = argparse.ArgumentParser( + description='Convert .word.json transcription files (output of whisper_timestamped) to srt or vtt, being able to cut long segments', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('input', type=str, help='Input json file, or input folder') + parser.add_argument('output', type=str, help='Output srt or vtt file, or output folder') + parser.add_argument('--max_length', default=200, help='Maximum length of a segment in characters', type=int) + parser.add_argument('--format', type=str, default="all", help='Output format (if the output is a folder, i.e. not a file with an explicit extension)', choices= supported_formats + ["all"]) + args = parser.parse_args() + + if os.path.isdir(args.input) or not max([args.output.endswith(e) for e in supported_formats]): + input_files = [f for f in os.listdir(args.input) if f.endswith(".words.json")] if os.path.isdir(args.input) else [os.path.basename(args.input)] + extensions = [args.format] if args.format != "all" else ["srt", "vtt"] + output_files = [[os.path.join(args.output, f[:-11] + "." + e) for e in extensions] for f in input_files] + if os.path.isdir(args.input): + input_files = [os.path.join(args.input, f) for f in input_files] + else: + input_files = [args.input] + if not os.path.isdir(args.output): + os.makedirs(args.output) + else: + input_files = [args.input] + output_files = [[args.output]] + if not os.path.isdir(os.path.dirname(args.output)): + os.makedirs(os.path.dirname(args.output)) + + for fn, outputs in zip(input_files, output_files): + with open(fn, "r", encoding="utf-8") as f: + transcript = json.load(f) + segments = transcript["segments"] + if args.max_length: + language = transcript["language"] + use_space = language not in ["zh", "ja", "th", "lo", "my"] + segments = split_long_segments(segments, args.max_length, use_space=use_space) + for output in outputs: + if output.endswith(".srt"): + with open(output, "w", encoding="utf-8") as f: + write_srt(segments, file=f) + elif output.endswith(".vtt"): + with open(output, "w", encoding="utf-8") as f: + write_vtt(segments, file=f) + else: + raise RuntimeError(f"Unknown output format for {output}") + +if __name__ == "__main__": + cli() \ No newline at end of file diff --git a/whisper_timestamped/transcribe.py b/whisper_timestamped/transcribe.py new file mode 100644 index 0000000000000000000000000000000000000000..9a444c29c703e3fec9d91ea6b1ae21ecaa2a54d8 --- /dev/null +++ b/whisper_timestamped/transcribe.py @@ -0,0 +1,3169 @@ +#!/usr/bin/env python3 + +__author__ = "Jérôme Louradour" +__credits__ = ["Jérôme Louradour"] +__license__ = "GPLv3" +__version__ = "1.15.4" + +# Set some environment variables +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # Remove warning "This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)..." +os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # GPU in the right order + +# openai-whisper and pytorch +import whisper +import torch +import torch.nn.functional as F + +from importlib.util import find_spec +if find_spec("intel_extension_for_pytorch") is not None: + try: + import intel_extension_for_pytorch + except ImportError: + pass + +# For alignment +import numpy as np +import dtw +# from scipy.signal import medfilt as median_filter +from scipy.ndimage import median_filter # faster owing to https://github.com/openai/whisper/commit/f0083e7eb20d032390e42f6f6039947fa8669c93 +from scipy.signal import find_peaks + +# Additional +import string +import csv +import sys +import gzip, base64 +import copy +import re +import shutil +import json + +# Constant variables +from whisper.utils import format_timestamp +from whisper.audio import N_FRAMES, HOP_LENGTH, SAMPLE_RATE # 3000, 160, 16000 +AUDIO_SAMPLES_PER_TOKEN = HOP_LENGTH * 2 # 320 +AUDIO_TIME_PER_TOKEN = AUDIO_SAMPLES_PER_TOKEN / SAMPLE_RATE # 0.02 (sec) +SEGMENT_DURATION = N_FRAMES * HOP_LENGTH / SAMPLE_RATE # 30.0 (sec) + +# Logs +import logging +logger = logging.getLogger("whisper_timestamped") + +DEFAULT_BACKEND = "openai-whisper" # "transformers" +USE_EFFICIENT_BY_DEFAULT = True +TRUST_WHISPER_TIMESTAMP_BY_DEFAULT = True +DISFLUENCY_MARK = "[*]" + +try: + whisper_version = whisper.__version__ +except NameError: + whisper_version = "" +WHIPSER_GE_20230306 = whisper_version >= "20230306" +WHIPSER_GE_20230308 = whisper_version >= "20230308" + +def transcribe_timestamped( + # Main Whisper options + model, + audio, + language=None, + task="transcribe", + + # Additional options for word alignment + remove_punctuation_from_words=False, + compute_word_confidence=True, + include_punctuation_in_confidence=False, + refine_whisper_precision=0.5, + min_word_duration=0.02, # Was 0.04 before 1.11 + plot_word_alignment=False, + word_alignment_most_top_layers=None, # Was 6 before 1.9 + remove_empty_words=False, + use_backend_timestamps=False, + + # Reproducibility + seed=1234, + + vad=False, + detect_disfluencies=False, + trust_whisper_timestamps=TRUST_WHISPER_TIMESTAMP_BY_DEFAULT, + naive_approach=False, + + # Other Whisper options + temperature=0.0 if USE_EFFICIENT_BY_DEFAULT else (0.0, 0.2, 0.4, 0.6, 0.8, 1.0), + best_of=None, + beam_size=None, + patience=None, + length_penalty=None, + compression_ratio_threshold=2.4, + logprob_threshold=-1.0, + no_speech_threshold=0.6, + fp16=None, + condition_on_previous_text=True, + initial_prompt=None, + suppress_tokens="-1", + sample_len=None, + verbose=False, +): + """ + Transcribe an audio file using Whisper + + Parameters + ---------- + model: Whisper + The Whisper model instance. + + audio: Union[str, np.ndarray, torch.Tensor] + The path to the audio file to open, or the audio waveform in 16kHz. + + language: str + The language to use for the transcription. If None, the language is detected automatically. + + task: str + The task to perform: either "transcribe" or "translate". + + remove_punctuation_from_words: bool + If False, words will be glued with the next punctuation mark (if any). + If True, there will be no punctuation mark in the `words[:]["text"]` list. + It only affects these strings; This has no influence on the computation of the word confidence, whatever the value of `include_punctuation_in_confidence` is. + + include_punctuation_in_confidence: bool + Whether to include proba of punctuation in the computation of the (previous) word confidence. + + compute_word_confidence: bool + Whether to compute word confidence. + If True, a finer confidence for each segment will be computed as well. + + vad: bool or str in ["silero", "silero:3.1", "auditok"] or list of start/end timestamps pairs corresponding to speech (ex: [(0.0, 3.50), (32.43, 36.43)]) + Whether to perform voice activity detection (VAD) on the audio file, to remove silent parts before transcribing with Whisper model. + This should decrease hallucinations from the Whisper model. + When set to True, the default VAD algorithm is used (silero). + When set to a string, the corresponding VAD algorithm is used (silero, silero:3.1 or auditok). + Note that the library for the corresponding VAD algorithm must be installed. + + detect_disfluencies: bool + Whether to detect disfluencies (i.e. hesitations, filler words, repetitions, corrections, etc.) that Whisper model might have omitted in the transcription. + This should make the word timestamp prediction more accurate. + And probable disfluencies will be marked as special words "[*]". + + trust_whisper_timestamps: bool + Whether to rely on Whisper's timestamps to get approximative first estimate of segment positions (up to refine_whisper_precision). + + refine_whisper_precision: float + How much can we refine Whisper segment positions, in seconds. Must be a multiple of 0.02. + + min_word_duration: float + Minimum duration of a word, in seconds. If a word is shorter than this, timestamps will be adjusted. + + plot_word_alignment: bool + Whether to plot the word alignment for each segment. matplotlib must be installed to use this option. + + remove_empty_words: bool + Whether to remove words with no duration occuring at the end of segments (probable Whisper hallucinations). + + use_backend_timestamps: bool + Whether to use word timestamps provided by the backend (openai-whisper or transformers), instead of the ones computed by more complex heuristics of whisper-timestamped. + + seed: int + Random seed to use for temperature sampling, for the sake of reproducibility. + Choose None for unpredictable randomness. + + naive_approach: bool + Force the naive approach that consists in decoding twice the audio file, once to get the transcritpion and once with the decoded tokens to get the alignment. + Note that this approach is used anyway when beam_size is not None and/or when the temperature is a list with more than one element. + + temperature: float + Temperature for sampling. + + compression_ratio_threshold: float + If the gzip compression ratio is above this value, treat as failed. + + logprob_threshold: float + If the average log probability over sampled tokens is below this value, treat as failed. + + no_speech_threshold: float + If the no_speech probability is higher than this value AND the average log probability + over sampled tokens is below `logprob_threshold`, consider the segment as silent. + + condition_on_previous_text: bool + if True, the previous output of the model is provided as a prompt for the next window; + disabling may make the text inconsistent across windows, but the model becomes less prone to + getting stuck in a failure loop, such as repetition looping or timestamps going out of sync. + + initial_prompt: str + Optional text to provide as a prompt for the first window. + + suppress_tokens: str + Comma-separated list of token ids to suppress during sampling; + '-1' will suppress most special characters except common punctuations. + + verbose: bool + Whether to display the text being decoded to the console. If True, displays all the details, + If False, displays minimal details. If None, does not display anything + + Returns + ------- + A dictionary containing the resulting text ("text") and segment-level details ("segments"), and + the spoken language ("language"), which is detected when `decode_options["language"]` is None. + """ + + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + # Check input options + assert refine_whisper_precision >= 0 and refine_whisper_precision / AUDIO_TIME_PER_TOKEN == round(refine_whisper_precision / AUDIO_TIME_PER_TOKEN), f"refine_whisper_precision must be a positive multiple of {AUDIO_TIME_PER_TOKEN}" + refine_whisper_precision_nframes = round(refine_whisper_precision / AUDIO_TIME_PER_TOKEN) + assert min_word_duration >= 0, f"min_word_duration must be a positive number" + assert word_alignment_most_top_layers is None or word_alignment_most_top_layers > 0, f"word_alignment_most_top_layers must be a strictly positive number" + + if isinstance(temperature, (list, tuple)) and len(temperature) == 1: + temperature = temperature[0] + if isinstance(temperature, (list, tuple)): # temperature fallback + naive_approach = True + elif temperature > 0 and best_of is not None and best_of > 1: # random sampling + naive_approach = True + if beam_size is not None: # beam-search + naive_approach = True + + # TODO: check if efficient approach is possible with transformers backend + # (careful: decoding heuristics are completely different from the ones used in openai-whisper) + if is_transformer_model(model) or use_backend_timestamps: + naive_approach = True + + # Input options + vad = check_vad_method(vad) + if isinstance(model, str): + model = load_model(model) + if fp16 is None: + fp16 = model.device != torch.device("cpu") + + # Safety check + input_stride = N_FRAMES // model.dims.n_audio_ctx + time_precision = input_stride * HOP_LENGTH / SAMPLE_RATE + assert time_precision == AUDIO_TIME_PER_TOKEN + + alignment_heads = get_alignment_heads(model) if word_alignment_most_top_layers is None else None + if alignment_heads is None and word_alignment_most_top_layers is None: + word_alignment_most_top_layers = 6 + + alignment_options = dict( + remove_punctuation_from_words=remove_punctuation_from_words, + compute_word_confidence=compute_word_confidence, + include_punctuation_in_confidence=include_punctuation_in_confidence, + detect_disfluencies=detect_disfluencies, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + plot_word_alignment=plot_word_alignment, + word_alignment_most_top_layers=word_alignment_most_top_layers, + alignment_heads=alignment_heads, + ) + whisper_options = dict( + language=language, + task=task, + fp16=fp16, + temperature=temperature, + best_of=best_of, + beam_size=beam_size, + patience=patience, + length_penalty=length_penalty, + condition_on_previous_text=condition_on_previous_text, + initial_prompt=initial_prompt, + suppress_tokens=suppress_tokens, + sample_len=sample_len, + verbose=verbose if (not vad or verbose is not True) else False, + ) + other_options = dict( + no_speech_threshold=no_speech_threshold, + logprob_threshold=logprob_threshold, + compression_ratio_threshold=compression_ratio_threshold, + ) + + if vad is not None: + audio = get_audio_tensor(audio) + audio, vad_segments, convert_timestamps = remove_non_speech(audio, method=vad, sample_rate=SAMPLE_RATE, plot=plot_word_alignment, avoid_empty_speech=True) + else: + vad_segments = None + + global num_alignment_for_plot + num_alignment_for_plot = 0 + + if naive_approach: + (transcription, words) = _transcribe_timestamped_naive(model, audio, + min_word_duration=0.0, # Was 0.04 before 1.11 + trust_whisper_timestamps=trust_whisper_timestamps, + use_backend_timestamps=use_backend_timestamps, + **alignment_options, **whisper_options, **other_options) + else: + (transcription, words) = _transcribe_timestamped_efficient(model, audio, + trust_whisper_timestamps=trust_whisper_timestamps, + **alignment_options, **whisper_options, **other_options) + if remove_empty_words: + # Remove words with empty duration happening at the end of segments, to remove some hallucinations + transcription, words = remove_last_null_duration_words(transcription, words, recompute_text=True) + + # Refine word positions + ensure_increasing_positions(words, min_duration=min_word_duration if trust_whisper_timestamps else 0) + + # Combine words and segments + whisper_segments = transcription["segments"] + for word in words: + if verbose and not naive_approach and not vad: + print_timestamped(word) + word.pop("tokens", None) + word.pop("tokens_indices", None) + if "avg_logprob_reliable" in word: + word.pop("avg_logprob_reliable") + idx_segment = word.pop("idx_segment") + assert idx_segment < len(whisper_segments), f"Fatal error: Got unexpected segment index {idx_segment} >= {len(whisper_segments)}" + segment = whisper_segments[idx_segment] + if "words" in segment: + segment["words"].append(word) + else: + segment["words"] = [word] + if refine_whisper_precision: + segment["start"] = word["start"] + if refine_whisper_precision: + segment["end"] = word["end"] + + if vad: + # Recompute timestamps to match the original audio + for segment in whisper_segments: + for word in segment.get("words", []): + word["start"], word["end"] = convert_timestamps(word["start"], word["end"]) + if verbose: + print_timestamped(word) + if refine_whisper_precision and len(segment.get("words", [])): + segment["start"] = segment["words"][0]["start"] + segment["end"] = segment["words"][-1]["end"] + else: + segment["start"], segment["end"] = convert_timestamps(segment["start"], segment["end"]) + + if vad_segments is not None: + transcription["speech_activity"] = [{"start":s, "end":e} for (s,e) in vad_segments] + + return transcription + +def _transcribe_timestamped_efficient( + model, + audio, + remove_punctuation_from_words, + compute_word_confidence, + include_punctuation_in_confidence, + refine_whisper_precision_nframes, + alignment_heads, + plot_word_alignment, + word_alignment_most_top_layers, + detect_disfluencies, + trust_whisper_timestamps, + use_timestamps_for_alignment = True, + # Whisper specific options + **whisper_options, +): + + # Get options + sample_len = whisper_options["sample_len"] + temperature = whisper_options["temperature"] + no_speech_threshold = whisper_options["no_speech_threshold"] + logprob_threshold = whisper_options["logprob_threshold"] + verbose = whisper_options["verbose"] + # Note: "on-the-fly" verbose is not implementable in the current state (we don't know the absolute position of the current chunk). See issue #18 + verbose_bugged = False + whisper_options["verbose"] = None if whisper_options["verbose"] is True else whisper_options["verbose"] # We will print intermediate results ourselves + + logit_filters = get_logit_filters(model, whisper_options) + language = whisper_options["language"] + tokenizer = get_tokenizer(model, task=whisper_options["task"], language=language) + + max_sample_len = sample_len or model.dims.n_text_ctx // 2 + n_ctx = model.dims.n_text_ctx + + debug = logger.getEffectiveLevel() >= logging.DEBUG + + word_alignment_most_top_layers = float("inf") if word_alignment_most_top_layers is None else word_alignment_most_top_layers + + # The main outcome + timestamped_word_segments = [] # list of timestamped word segments that have been collected so far + # Main variables to be accumulated + segment_tokens = [[]] # list of lists of token indices that have been collected so far (one list per segment) + segment_attweights = [[] for _ in range(min(word_alignment_most_top_layers, len(model.decoder.blocks)))] + # attention weights on the last segments + segment_avglogprobs = [] # average log probability for each segment (actually of the corresponding chunk, as computed by whisper) + segment_logprobs = [] # token log probabilities for each segment + # Variables related to options that can skip some segments + sot_index = None # index of the SOT token in the current set of processed tokens + no_speech_prob = None # no speech probability for the current 30 sec chunk + chunk_logprobs = [] # log probabilities for the current 30 sec chunk + chunk_tokens = [] # tokens for the current 30 sec chunk (list of Torch tensors) + chunk_tokens_nosot = [] # tokens for the current 30 sec chunk, without the SOT tokens (list of indices) + last_chunk_token = None # last token of the current chunk, that may be needed for corner cases + last_token_fallback = None # last token to use as a fallback if the model gets stuck + has_started = False # whether we have started decoding + mfcc = None # MFCC features for the current 30 sec chunk + new_mfcc = None # + num_inference_steps = 0 # number of inference steps performed so far (for debugging only) + language_probs = None # language detection probabilities + + def is_sot(curr_tokens): + return curr_tokens is None or len(curr_tokens) > 1 or curr_tokens[0] == tokenizer.sot + + def has_reached_decoding_limit(): + n = len(chunk_tokens_nosot) + 1 + m = n + (len(chunk_tokens[0]) if len(chunk_tokens) > 0 else 0) + return n + 1 >= max_sample_len or m > n_ctx + + def reset(add_segment, keep_last_token=True): + """ Reset the list of tokens for the current speech segment, and corresponding cross-attention weights """ + nonlocal segment_tokens, segment_attweights + if add_segment: + if keep_last_token: + segment_tokens.append([segment_tokens[-1][-1]]) + segment_attweights = [w[-1:] for w in segment_attweights] + else: + segment_tokens.append([]) + segment_attweights = [[] for w in segment_attweights] + segment_tokens[-2].pop(0) + elif len(segment_tokens[-1]) > 0: + if debug: + logger.debug(f"Reset last segment: {tokenizer.decode_with_timestamps(segment_tokens[-1])}") + segment_tokens[-1] = [] + segment_attweights = [[] for w in segment_attweights] + + saw_consecutive_timestamps = False + def must_flush_segment(curr_tokens): + """ Return whether or not the previously collected tokens must be used to add a new speech segment """ + nonlocal segment_tokens, saw_consecutive_timestamps, chunk_tokens_nosot + + if not is_sot(curr_tokens): + is_timestamp = curr_tokens[0] >= tokenizer.timestamp_begin + is_previous_timestamp = segment_tokens[-1][-1] >= tokenizer.timestamp_begin if len(segment_tokens[-1]) > 0 else False + consecutive_timestamps = is_timestamp and is_previous_timestamp + if consecutive_timestamps: + saw_consecutive_timestamps = True + return consecutive_timestamps + else: # Several tokens as a prompt or must flush last segments + + must_flush = len(segment_tokens[-1]) > 1 and not saw_consecutive_timestamps + if not must_flush and WHIPSER_GE_20230306: # If the last token is a timestamp, the last segment is used + if last_chunk_token is None: + must_flush = (len(segment_tokens[-1]) > 2 and segment_tokens[-1][-1] >= tokenizer.timestamp_begin) + else: + must_flush = (last_chunk_token >= tokenizer.timestamp_begin) + if not must_flush and trust_whisper_timestamps: + # Discard the end of the last transcription + reset(False) + saw_consecutive_timestamps = False + return must_flush + + index_begin_30sec_chunck = 0 + def get_index_begin_30sec_chunck(curr_tokens): + nonlocal index_begin_30sec_chunck, has_started + + if is_sot(curr_tokens) and has_started: + if trust_whisper_timestamps: + res = index_begin_30sec_chunck + index_begin_30sec_chunck = len(segment_tokens)-1 + else: + res = len(segment_tokens)-1 + return res + + def align_last_segment(curr_tokens=None): + nonlocal segment_tokens, segment_attweights, timestamped_word_segments, has_started, no_speech_prob, chunk_tokens, chunk_tokens_nosot, chunk_logprobs, mfcc, new_mfcc, logit_filters, index_begin_30sec_chunck, last_token_fallback, num_inference_steps + + if debug and trust_whisper_timestamps: + logger.debug(f"Add segment {len(timestamped_word_segments)+1} at step {num_inference_steps}:\n\t{tokenizer.decode_with_timestamps(segment_tokens[-1])}") + + tokens = segment_tokens[-1][1:] + + # When the decoding hit the max limit (number of tokens) -- usually when the language model gets stuck -- + # then we have to recover the last token from what is send to the decoder + unfinished_decoding = has_reached_decoding_limit() + last_is_not_timestamp = len(tokens) and tokens[-1] < tokenizer.timestamp_begin + last_token_reliable = True + + if unfinished_decoding: + logger.debug(f"WARNING: decoding hit the max limit for segment {segment_tokens[-1]} (It usually happens when the language model gets stuck)") + # The last token chosen is in the prompt for the new chunk + if curr_tokens is not None and curr_tokens[0] == tokenizer.sot_prev: + index_sot = (curr_tokens == tokenizer.sot).nonzero(as_tuple=True) + assert len(index_sot) == 1 + index_sot = index_sot[0].item() + assert index_sot > 0 + last_token_fallback = curr_tokens[index_sot-1].item() + logger.debug(f" Guessed last token from the prompt for the new chunk: {last_token_fallback}") + # Fallback for the last segment, or without prompt: Assume greedy decoding + else: + last_token_fallback = torch.argmax(chunk_logprobs[-1]).item() if last_chunk_token is None else last_chunk_token + last_token_reliable = (temperature == 0) + logger.debug(f" Guess last token using probas (assuming greedy decoding): {last_token_fallback}") + if debug: + logger.debug(f"WARNING: also add last token: {tokenizer.decode_with_timestamps([last_token_fallback])}") + + tokens.append(last_token_fallback) + segment_tokens[-1].append(last_token_fallback) + attention_weights = [torch.cat(w, dim=-2) for w in segment_attweights] + last_logprobs = chunk_logprobs[-1] + elif last_is_not_timestamp: # was emitted early, without a timestamp before + logger.debug(f"WARNING: end timestamp not produced. Adding <|endoftext|>") + tokens.append(tokenizer.eot) + segment_tokens[-1].append(tokenizer.eot) + attention_weights = [torch.cat(w, dim=-2) for w in segment_attweights] + last_logprobs = chunk_logprobs[-1] + else: + attention_weights = [torch.cat(w[:-1], dim=-2) for w in segment_attweights] + last_logprobs = chunk_logprobs[-2] + + # Check prediction of last token + end_token = tokens[-1] + if end_token >= tokenizer.timestamp_begin: + start_token = tokens[0] + assert start_token >= tokenizer.timestamp_begin + # If Whisper prediction of the end is obviously wrong, we predict it again (constrained) + if end_token <= start_token: + new_end_token = last_logprobs[start_token+1:].argmax() + start_token + 1 + tokens[-1] = new_end_token.item() + if debug: + logger.debug(f"Re-estimated end token {tokenizer.decode_with_timestamps([new_end_token])} (was {tokenizer.decode_with_timestamps([end_token])}) to be after start token {tokenizer.decode_with_timestamps([start_token])}") + + if len(tokens) <= 1: + # Corner case: nothing in between timestamps + ws = [] + else: + ws = perform_word_alignment( + tokens, + attention_weights, + tokenizer, + use_space=should_use_space(language), + alignment_heads=alignment_heads, + remove_punctuation_from_words=remove_punctuation_from_words, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + detect_disfluencies=detect_disfluencies, + unfinished_decoding=unfinished_decoding, + mfcc=mfcc, + plot=plot_word_alignment, + debug=debug, + ) + + add_segment = len(ws) > 0 + if add_segment: + timestamped_word_segments.append(ws) + else: + logger.debug(f"Not added!") + reset(add_segment, not is_sot(curr_tokens)) + + return add_segment, unfinished_decoding, last_token_reliable + + def may_flush_segment(curr_tokens = None): + """ Add a speech segment with the new tokens if necessary. + May also remove the last collected segments if filtered out by Whisper (no_speech_prob <= no_speech_threshold) + """ + nonlocal segment_tokens, segment_attweights, timestamped_word_segments, segment_logprobs, has_started, no_speech_prob, chunk_tokens, chunk_tokens_nosot, chunk_logprobs, mfcc, new_mfcc, logit_filters, index_begin_30sec_chunck, last_token_fallback, num_inference_steps, last_chunk_token + + # Check if a new segment should be added + unfinished_decoding = False + last_token_reliable = True + + if must_flush_segment(curr_tokens) and trust_whisper_timestamps: + _, unfinished_decoding, last_token_reliable = align_last_segment(curr_tokens) + + i_start = get_index_begin_30sec_chunck(curr_tokens) + + # All segments from previous 30sec chunck have been collected + if i_start is not None: + + if not trust_whisper_timestamps: + + tokens = torch.Tensor(segment_tokens[-1]).int() + idx_task = torch.where(tokens==tokenizer.sot_sequence[-1])[0][0].item() # index of <|transcribe|> + + is_special = tokens.ge(tokenizer.eot) + # Remove prompt + is_special[:idx_task] = True + # Keep begin timestamp + is_special[idx_task:idx_task+2] = False + + is_timestamp = tokens.ge(tokenizer.timestamp_begin) + consecutive = torch.where(is_timestamp[1:] & is_timestamp[:-1])[0] + if (WHIPSER_GE_20230306 or has_reached_decoding_limit()) and ( + (is_timestamp[-1] and not is_timestamp[-2]) if last_chunk_token is None else + last_chunk_token >= tokenizer.timestamp_begin and not is_timestamp[-2] + ): + consecutive = torch.cat([consecutive, torch.Tensor([len(tokens)-1]).int()]) + last_is_timestamp = True + if len(consecutive): + # Remove last tokens + is_special[consecutive[-1]+1:] = True + # Keep end timestamp + is_special[consecutive[-1]] = False + elif is_timestamp[-1]: + # Keep end timestamp + is_special[-1] = False + else: + last_is_timestamp = False + + if use_timestamps_for_alignment and len(consecutive): + # Keep all timestamps + is_special[idx_task+2:consecutive[-1]] = False + + # Do remove what has to be removed + is_next_achar = ~torch.cat([is_special[1:], torch.Tensor([False]).bool()]) + for i, weights in enumerate(segment_attweights): + assert len(weights) == len(tokens), f"{len(weights)} attention weights != {len(tokens)}" + # We must remove attention weights used to predict timestamp tokens + segment_attweights[i] = [w for s, w in zip(is_next_achar, weights) if s] + tokens_filtered = tokens[~is_special] + assert len(segment_attweights[0]) == len(tokens_filtered), f"{len(segment_attweights[0])} attention weights != {len(tokens_filtered)} " + + # Replace first and last timestamp + orig_start, orig_end = tokens_filtered[1].item(), tokens_filtered[-1].item() + tokens_filtered[1] = tokenizer.timestamp_begin # <|0.00|> + if last_is_timestamp: + tokens_filtered[-1] = tokenizer.timestamp_begin + N_FRAMES // 2 # <|30.00|> + segment_tokens[-1] = tokens_filtered.tolist() + + # Do alignment + added, unfinished_decoding, last_token_reliable = align_last_segment() + + # Re-split into segments (if necessary) + if added: + if len(consecutive) > 1: + segments_timestamped_concat = timestamped_word_segments[-1] + new_segments_timestamped = [] + new_segment_tokens = [] + start = idx_task+1 + i_word = 0 + for i, end in enumerate(consecutive): + end = end.item() + new_segment_tokens.append(tokens[start:end+1].tolist()) + if debug: + logger.debug(f"Add segment {len(timestamped_word_segments)+i}:\n\t{tokenizer.decode_with_timestamps(new_segment_tokens[-1])}") + total_length = end - start - 1 + start = end+1 + length = 0 + new_segments_timestamped.append([]) + while length < total_length: + if not use_timestamps_for_alignment and i_word == len(segments_timestamped_concat): + # This can happen in the case of "..." + assert total_length == 1 and i == len(consecutive)-1, "Unexpected situation!" + break + assert i_word < len(segments_timestamped_concat), f"i_word={i_word} < len(segments_timestamped_concat)={len(segments_timestamped_concat)}" + word = segments_timestamped_concat[i_word] + new_segments_timestamped[-1].append(word) + length += len(word["tokens_indices"]) + i_word += 1 + # This can be non zero, when a punctuation (alone in a segment) is glued to the previous segment + if use_timestamps_for_alignment: + assert length == total_length, f"length={length} != total_length={total_length}" + elif length > total_length: + delta = length - total_length + word = new_segments_timestamped[-1][-1] + word_tokindices = word["tokens_indices"] + word_tokens = word["tokens"] + word["tokens_indices"] = word_tokindices[:-delta] + word["tokens"] = word_tokens[:-delta] + word["word"] = "".join(word_tokens[:-delta]) + i_word -= 1 + t = segments_timestamped_concat[i_word]["end"] + segments_timestamped_concat[i_word] = dict( + text="".join(word_tokens[-delta:]), + start=t, end=t, # Word without timestamp + tokens=word_tokens[-delta:], + tokens_indices=word_tokindices[-delta:], + ) + + assert i_word == len(segments_timestamped_concat) + + segment_tokens = segment_tokens[:-2] + new_segment_tokens + [segment_tokens[-1]] + timestamped_word_segments = timestamped_word_segments[:-1] + new_segments_timestamped + + else: + + # Recover start and end token + segment = segment_tokens[-2] + tokenizer.decode_with_timestamps([orig_start,orig_end]) + segment[0] = orig_start + if last_is_timestamp: + segment[-1] = orig_end + + if debug: + logger.debug(f"Add segment {len(timestamped_word_segments)}:\n\t{tokenizer.decode_with_timestamps(segment)}") + + if unfinished_decoding: + timestamped_word_segments[-1][-1]["avg_logprob_reliable"] = last_token_reliable + + reset(False) + + mfcc = new_mfcc + + n_segments = len(segment_tokens)-1 + + # Get word confidence and/or check if previous segments shoud have been skipped + should_skip = False + if compute_word_confidence or no_speech_threshold is not None: + + # no voice activity check + should_skip = (no_speech_prob > no_speech_threshold) if (no_speech_threshold is not None) else False + if compute_word_confidence or (should_skip and logprob_threshold is not None): + n = len(chunk_logprobs) + if n == len(chunk_tokens_nosot): + chunk_tokens_nosot = chunk_tokens_nosot[1:] + if unfinished_decoding: + assert last_token_fallback is not None + last_tokens = [last_token_fallback] + timestamped_word_segments[-1][-1]["avg_logprob_reliable"] = last_token_reliable + n += 1 + elif has_reached_decoding_limit(): + # there were segments in the 30sec chunck, and then the LM got stuck + last_tokens = [torch.argmax(chunk_logprobs[-1]).item()] + timestamped_word_segments[-1][-1]["avg_logprob_reliable"] = (temperature == 0) + else: + last_tokens = [tokenizer.eot] + chunck_indices = chunk_tokens_nosot + last_tokens + assert len(chunk_logprobs) == len(chunck_indices), f"{len(chunk_logprobs)} != {len(chunck_indices)}" + logprobs = torch.cat([logprob[i].unsqueeze(0) for (logprob, i) in zip(chunk_logprobs, chunck_indices)]) + assert min([p.isfinite().item() for p in logprobs]), \ + f"Got infinite logprob among ({len(logprobs)}) {[(i, tokenizer.decode_with_timestamps([i]), v.item()) for (i,v) in zip(chunck_indices, logprobs)]}" + sum_logprob = sum(logprobs) + avg_logprob = sum_logprob/n + # don't skip if the logprob is high enough, whatever the no_speech_prob is + if logprob_threshold is not None and avg_logprob > logprob_threshold: + should_skip = False + + if should_skip: + logger.debug(f"Skipping last {n_segments-i_start} segments (no_speech_prob {no_speech_prob} > {no_speech_threshold} and avg_logprob {avg_logprob} < {logprob_threshold})") + index_begin_30sec_chunck -= n_segments-i_start + segment_tokens = segment_tokens[:i_start] + [segment_tokens[-1]] + timestamped_word_segments = timestamped_word_segments[:i_start] + elif compute_word_confidence: + avg_logprob = avg_logprob.item() + i_token_end = -1 + for i in range(i_start, n_segments): + tokens = segment_tokens[i] + i_token_start = i_token_end + 1 + i_token_end = i_token_start + len(tokens) + assert chunck_indices[i_token_start:i_token_end] == tokens, f"Inconsistent token list {tokenizer.decode_with_timestamps(chunck_indices[i_token_start:i_token_end])} != {tokenizer.decode_with_timestamps(tokens)}" + i_token_start += 1 # skip sos (start time) + if not unfinished_decoding or i != n_segments-1: + i_token_end -= 1 # skip eos (end time) + segment_logprobs.append(logprobs[i_token_start:i_token_end]) + segment_avglogprobs.append(avg_logprob) + else: + for i in range(i_start, n_segments): + segment_logprobs.append(None) + segment_avglogprobs.append(None) + + else: + for i in range(i_start, n_segments): + segment_logprobs.append(None) + segment_avglogprobs.append(None) + + if verbose_bugged and not should_skip: + for segment in timestamped_word_segments[i_start:]: + for word in segment: + print_timestamped(word) + + # Reset counters + chunk_tokens = [] + chunk_tokens_nosot = [] + chunk_logprobs = [] + no_speech_prob = None + + def hook_attention_weights(layer, ins, outs, index): + nonlocal segment_attweights + # In old version of whisper, output is a single tensor + assert isinstance(outs, tuple) and len(outs) == 2, "whisper seems to be outdated, please update it (pip install --upgrade --no-deps --force-reinstall git+https://github.com/openai/whisper.git)" + if not has_started: + return + w = outs[-1] + # Only the last attention weights is useful + if w.shape[-2] > 1: + w = w[:, :, -1:, :] + segment_attweights[index].append(w.cpu()) + + def hook_mfcc(layer, ins, outs): + nonlocal new_mfcc, mfcc + new_mfcc = ins[0] + if mfcc is None: + mfcc = new_mfcc + + def hook_input_tokens(layer, ins, outs): + nonlocal segment_tokens, sot_index, chunk_tokens, chunk_tokens_nosot, logit_filters, has_started, language, num_inference_steps + num_inference_steps += 1 + + curr_tokens = ins[0] + assert curr_tokens.shape[0] == 1, "Batch decoding is not supported" + curr_tokens = curr_tokens.squeeze(0) + + if is_sot(curr_tokens): + chunk_prompt = curr_tokens.tolist() + if language is None: + if len(curr_tokens) > 1: + language = tokenizer.decode(curr_tokens[-2:-1]) + language = language[2:-2] # remove trailing "<|" and "|>" + whisper_options["language"] = language + + if verbose and not whisper_options["verbose"] and len(curr_tokens) > 1: + # Reproduce whisper verbose (2/2) + print(f"Detected language: {whisper.tokenizer.LANGUAGES[language].title()}") + sys.stdout.flush() + + logit_filters = get_logit_filters(model, whisper_options, prompt = chunk_prompt[1:-len(tokenizer.sot_sequence)]) + + may_flush_segment(curr_tokens) + + # Get the index of the <|startoftranscript|> tokens (to get proba of silence later) + if is_sot(curr_tokens): + has_started = len(curr_tokens) > 1 or not model.is_multilingual + if no_speech_threshold is not None: + sot_index = curr_tokens.tolist().index(tokenizer.sot) + else: + sot_index = None + + # Keep the last token only + if has_started: + segment_tokens[-1].append(curr_tokens[-1].item()) + + # Accumulate tokens + if has_started: + chunk_tokens.append(curr_tokens) + if not is_sot(curr_tokens): + chunk_tokens_nosot.append(curr_tokens[-1].item()) + else: + if verbose and not whisper_options["verbose"]: + # Reproduce whisper verbose (1/2) + print("Detecting language using up to the first 30 seconds. Use `--language` to specify the language") + + embedding_weights = None + def hook_output_logits(layer, ins, outs): + nonlocal no_speech_prob, chunk_logprobs, segment_tokens, chunk_tokens, chunk_tokens_nosot, last_chunk_token, embedding_weights, has_started, language, language_probs + + if embedding_weights is None: + embedding_weights = torch.transpose(model.decoder.token_embedding.weight, 0, 1).to(outs[0].dtype) + + # Get the probability of silence + if sot_index is not None and no_speech_prob is None: + logits = (outs[0][sot_index,:] @ embedding_weights).float() + logits = logits.softmax(dim=-1) + no_speech_prob = logits[tokenizer.no_speech].item() + + # Get language probabilities + if language is None and sot_index is not None and model.is_multilingual: + index_start = tokenizer.sot + 1 + index_end = index_start + len(tokenizer.all_language_tokens) + logits = (outs[0][sot_index,:] @ embedding_weights).float() + language_probs = logits[index_start:index_end].softmax(dim=-1) + language_probs = dict(zip(whisper.tokenizer.LANGUAGES, language_probs.tolist())) + + # Get the log-probabilities of tokens (we don't know yet which one will be chosen) + if has_started: + logits = (outs[0][-1:,:] @ embedding_weights).float() + tokens = torch.cat(chunk_tokens).unsqueeze(0) + for logit_filter in logit_filters: + logit_filter.apply(logits, tokens) + logits = F.log_softmax(logits.squeeze(0), dim=-1) + chunk_logprobs.append(logits) + + if WHIPSER_GE_20230306 and has_reached_decoding_limit(): + last_chunk_token = torch.argmax(logits).item() + else: + last_chunk_token = None + + try: + + # Add hooks to the model, to get tokens and attention weights on the fly + all_hooks = [] + all_hooks.append(model.encoder.conv1.register_forward_hook(hook_mfcc)) + all_hooks.append(model.decoder.token_embedding.register_forward_hook(hook_input_tokens)) + nblocks = len(model.decoder.blocks) + j = 0 + for i, block in enumerate(model.decoder.blocks): + if i < nblocks - word_alignment_most_top_layers: + continue + all_hooks.append( + block.cross_attn.register_forward_hook( + lambda layer, ins, outs, index=j: hook_attention_weights(layer, ins, outs, index)) + ) + j += 1 + if compute_word_confidence or no_speech_threshold is not None: + all_hooks.append(model.decoder.ln.register_forward_hook(hook_output_logits)) + + transcription = model.transcribe(audio, **whisper_options) + + finally: + + # Remove hooks + for hook in all_hooks: + hook.remove() + + # Finalize (collect last segment) + may_flush_segment() + segment_tokens.pop(-1) + + token_special_idx = min(tokenizer.sot, tokenizer.eot) + def filter_tokens(tokens): + while len(tokens) and tokens[0] >= token_special_idx: + tokens = tokens[1:] + while len(tokens) and tokens[-1] >= token_special_idx: + tokens = tokens[:-1] + return tokens + + assert len(segment_tokens) == len(timestamped_word_segments), f"Inconsistent number of segments: tokens ({len(segment_tokens)}) != timestamped_word_segments ({len(timestamped_word_segments)})" + assert len(segment_avglogprobs) == len(segment_tokens), f"Inconsistent number of segments: avg logprobs ({len(segment_avglogprobs)}) != tokens ({len(segment_tokens)})" + assert len(segment_logprobs) == len(segment_tokens), f"Inconsistent number of segments: logprobs ({len(segment_logprobs)}) != tokens ({len(segment_tokens)})" + + whisper_segments = transcription["segments"] + # See issue 64: some segments may have empty text + if any(not s["text"] for s in whisper_segments): + whisper_segments = [s for s in whisper_segments if s["text"]] + l1 = len(whisper_segments) + l2 = len(timestamped_word_segments) + if l1 != l2 and l1 != 0: + logger.warning(f"Inconsistent number of segments: whisper_segments ({l1}) != timestamped_word_segments ({l2})") + assert l1 == l2 or l1 == 0, f"Inconsistent number of segments: whisper_segments ({l1}) != timestamped_word_segments ({l2})" + + logger.debug("Compile results") + words = [] + for i, (segment, timestamped_words, token, avglogprob, logprobs) in enumerate(zip(whisper_segments, timestamped_word_segments, segment_tokens, segment_avglogprobs, segment_logprobs)): + timestamped_tokens = filter_tokens(token) + whisper_tokens = filter_tokens(segment["tokens"]) + if timestamped_tokens != whisper_tokens: + if len(timestamped_tokens) == len(whisper_tokens) + 1: + logger.warning(f"An additional token was added on segment {i}") + elif WHIPSER_GE_20230306 and len(whisper_tokens) == 0: + logger.warning(f"Whisper has empty segment {i}") + assert segment["end"] == segment["start"], f"Fatal Error: Got empty segment {i} with non-zero duration" + segment["tokens"] = timestamped_tokens + segment["text"] = tokenizer.decode(timestamped_tokens) + else: + assert len(timestamped_tokens) < len(whisper_tokens) and timestamped_tokens == whisper_tokens[:len(timestamped_tokens)], \ + f"Fatal Error: Got inconsistent text for segment {i}:\n({len(timestamped_tokens)})\n{tokenizer.decode_with_timestamps(timestamped_tokens)}\n{timestamped_tokens}\n!=\n({len(whisper_tokens)})\n{tokenizer.decode_with_timestamps(whisper_tokens)}\n{whisper_tokens[:len(timestamped_tokens)]}" + segment["tokens"] = token if WHIPSER_GE_20230306 else timestamped_tokens # tokens include special timestamp tokens since 20230306 + segment["text"] = tokenizer.decode(segment["tokens"]) + logger.warning(f"Text had to be shortned on segment {i}:\n{tokenizer.decode(timestamped_tokens)}\n!=\n{tokenizer.decode(whisper_tokens)}") + timestamped_words[-1]["avg_logprob_reliable"] = False + + offset = segment["seek"] * HOP_LENGTH / SAMPLE_RATE + for timestamped_word in timestamped_words: + timestamped_word["start"] += offset + timestamped_word["end"] += offset + timestamped_word["idx_segment"] = i + + if compute_word_confidence: + if "avg_logprob_reliable" not in timestamped_words[-1] or timestamped_words[-1]["avg_logprob_reliable"]: + # assert abs(segment["avg_logprob"] - avglogprob) < 1e-2, f"Fatal Error: Got inconsistent logprob for segment {i}: {segment['avg_logprob']} != {avglogprob}" + if abs(segment["avg_logprob"] - avglogprob) >= 1e-2: + logger.warning(f"Recomputed different logprob for segment {i}: {avglogprob} != {segment['avg_logprob']}") + if include_punctuation_in_confidence: + segment["confidence"] = round_confidence(logprobs.mean().exp().item()) + else: + logprobs_nopunc = [] + i_end = 0 + for timestamped_word in timestamped_words: + i_start = i_end + tokens = timestamped_word["tokens"] + i_end += len(tokens) + + assert i_end <= len(logprobs), f"Fatal Error: Got out-of-bound index for segment {i}: {i_end} > {len(logprobs)}" + if include_punctuation_in_confidence: + word_logprobs = logprobs[i_start:i_end] + else: + while len(tokens) > 1 and len(tokens[-1]) and tokens[-1][-1] in _punctuation: # Note: look at the last character of token, to take into account "...", "!!", etc. + tokens = tokens[:-1] + word_logprobs = logprobs[i_start:i_start + len(tokens)] + logprobs_nopunc.append(word_logprobs) + + timestamped_word["confidence"] = round_confidence(word_logprobs.mean().exp().item() if len(word_logprobs) else 0.0) + + if i_end not in [len(logprobs), len(logprobs)-1]: + logger.warning(f"Got inconsistent length for segment {i} ({len(logprobs)} != {i_end}). Some words have been ignored.") + if not include_punctuation_in_confidence: + logprobs_nopunc = torch.cat(logprobs_nopunc) + segment["confidence"] = round_confidence(logprobs_nopunc.mean().exp().item()) + + words.extend(timestamped_words) + + if language_probs: + transcription["language_probs"] = language_probs + + return transcription, words + +def _transcribe_timestamped_naive( + model, + audio, + remove_punctuation_from_words, + compute_word_confidence, + include_punctuation_in_confidence, + refine_whisper_precision_nframes, + use_backend_timestamps, + alignment_heads, + plot_word_alignment, + word_alignment_most_top_layers, + detect_disfluencies, + trust_whisper_timestamps, + min_word_duration, + **whisper_options, +): + verbose = whisper_options["verbose"] + whisper_options["verbose"] = None if whisper_options["verbose"] is True else whisper_options["verbose"] # We will print intermediate results ourselves + language = whisper_options["language"] + refine_whisper_precision_sec = refine_whisper_precision_nframes * AUDIO_TIME_PER_TOKEN + + word_alignment_most_top_layers = float("inf") if word_alignment_most_top_layers is None else word_alignment_most_top_layers + + audio = get_audio_tensor(audio) + audio_duration = audio.shape[-1] / SAMPLE_RATE + + if verbose and language is None and not whisper_options["verbose"]: + # Reproduce whisper verbose (1/2) + print("Detecting language using up to the first 30 seconds. Use `--language` to specify the language") + + tokenizer = get_tokenizer(model, task=whisper_options["task"], language=language) + + transformer_backend = is_transformer_model(model) + if transformer_backend: + # Additional options specific to transformer models + whisper_options["remove_punctuation_from_words"] = remove_punctuation_from_words + whisper_options["use_token_timestamps"] = use_backend_timestamps + else: + whisper_options["word_timestamps"] = use_backend_timestamps + + language_probs = None + def hook_output_logits(layer, ins, outs): + nonlocal language_probs, tokenizer + + # Get language probabilities + if language is None and language_probs is None: + if outs.shape[1] == 1: + embedding_weights = torch.transpose(model.decoder.token_embedding.weight, 0, 1).to(outs[0].dtype) + index_start = tokenizer.sot + 1 + index_end = index_start + len(tokenizer.all_language_tokens) + logits = (outs[0][0,:] @ embedding_weights).float() + language_probs = logits[index_start:index_end].softmax(dim=-1) + language_probs = dict(zip(whisper.tokenizer.LANGUAGES, language_probs.tolist())) + else: + language_probs = False + + all_hooks = [] + if model.is_multilingual: + all_hooks.append(model.decoder.ln.register_forward_hook(hook_output_logits)) + + try: + model.alignment_heads = alignment_heads # Avoid exception "AttributeError: 'WhisperUntied' object has no attribute 'alignment_heads'. Did you mean: 'set_alignment_heads'?"" + transcription = model.transcribe(audio, **whisper_options) + finally: + for hook in all_hooks: + hook.remove() + + if not transformer_backend and verbose and language is None and not whisper_options["verbose"]: + # Reproduce whisper verbose (2/2) + print(f"Detected language: {whisper.tokenizer.LANGUAGES[transcription['language']].title()}") + sys.stdout.flush() + + # End early if timestamps have been computed by the backend + if transcription.get("segments") and "words" in transcription["segments"][0]: + words = [] + for i_segment, segment in enumerate(transcription["segments"]): + ws = segment.pop("words", []) + for w in ws: + # Rename openai-whisper -> whisper-timestamped + if "word" in w: w["text"] = w.pop("word") + if "probability" in w: w["confidence"] = round_confidence(w.pop("probability")) + w["idx_segment"] = i_segment + words.extend(ws) + if language_probs: + transcription["language_probs"] = language_probs + return transcription, words + + language = norm_language(transcription.get("language", language)) + use_space = should_use_space(language) + + n_mels = model.dims.n_mels if hasattr(model.dims, "n_mels") else 80 + + attention_weights = [[] for _ in range(min(word_alignment_most_top_layers, len(model.decoder.blocks)))] + + try: + + all_hooks = [] + + # Hook the model + nblocks = len(model.decoder.blocks) + j = 0 + for i, block in enumerate(model.decoder.blocks): + if i < nblocks - word_alignment_most_top_layers: + continue + def hook(layer, ins, outs, index=j): + if is_transformer_model(model): + attention_weights[index] = outs[1].log() + else: + attention_weights[index] = outs[1] + all_hooks.append( + block.cross_attn.register_forward_hook( + hook + # lambda layer, ins, outs, index=j: attention_weights.__setitem__(index, outs[1]) + ) + ) + j += 1 + + + # When not relying on Whisper timestamps + current_tokens = [] + token_to_idx_segment = [] + + words = [] + previous_end = 0 + whisper_segments = transcription["segments"] + for i_segment, segment in enumerate(whisper_segments): + + # Note: this could also be a fix to issue #61 where a "<|te|>" token was predicted + # segment["tokens"] = [t for t in segment["tokens"] if t < tokenizer.eot or t >= tokenizer.timestamp_begin] + + start = end = tokens = None + if trust_whisper_timestamps: + + start = segment["start"] + end = segment["end"] + if end < start: + # Whisper is wrong on the prediction of segment end + end = min(audio_duration, start + SEGMENT_DURATION) + + start_margin_min = start - refine_whisper_precision_sec + start_margin_max = start + refine_whisper_precision_sec + if start >= audio_duration - min_word_duration or (previous_end >= start_margin_min and previous_end <= start_margin_max): + # Make start as accurate as possible (as the decoding will start with timestamp <|0|>) + start = previous_end + else: + # Fallback + start = start_margin_min + + if start > audio_duration - min_word_duration: + # Skip last segment if too short + logger.warning(f"Skipping segment outside of audio duration {audio_duration} (original: {segment['start']}-{segment['end']}, new: {start}-XXX)") + continue + + end_margin_min = end - refine_whisper_precision_sec + end_margin_max = end + refine_whisper_precision_sec + if i_segment < len(whisper_segments) - 1: + # Try to enforce: + # end + min_word_duration <= next start + refine_whisper_precision_sec + end_margin_max2 = whisper_segments[i_segment + 1]["start"] + refine_whisper_precision_sec - min_word_duration + if end_margin_max2 >= end_margin_min: + end_margin_max = min(end_margin_max2, end_margin_max) + end = min(audio_duration, end_margin_max) + + if end < start + min_word_duration: + logger.warning(f"Got super short segment (original from whisper: {segment['start']}-{segment['end']}, new: {start, end})") + end = min(audio_duration, start + min_word_duration) + if end <= start: + logger.warning(f"Skipping this short segment occuring too close to the end of the audio") + continue + + tokens = segment["tokens"] + + else: + + seek = segment["seek"] + new_tokens = segment["tokens"] + if not len(new_tokens): + continue + # Add timestamps that will be needed after + if new_tokens[0] < tokenizer.timestamp_begin: + relative_start = segment["start"] - (seek * HOP_LENGTH / SAMPLE_RATE) + start_token = round(relative_start * SAMPLE_RATE / AUDIO_SAMPLES_PER_TOKEN) + tokenizer.timestamp_begin + new_tokens = [start_token] + new_tokens + if new_tokens[-1] < tokenizer.timestamp_begin: + relative_end = segment["end"] - (seek * HOP_LENGTH / SAMPLE_RATE) + end_token = round(relative_end * SAMPLE_RATE / AUDIO_SAMPLES_PER_TOKEN) + tokenizer.timestamp_begin + new_tokens = new_tokens + [end_token] + + current_tokens.extend(new_tokens) + token_to_idx_segment.extend([i_segment] * len(new_tokens)) + + next_seek = whisper_segments[i_segment+1]["seek"] if i_segment < len(whisper_segments) - 1 else None + if seek != next_seek: + start = float(seek * HOP_LENGTH / SAMPLE_RATE) + assert start < audio_duration, f"Got start {start} which is outside of audio duration {audio_duration}" + end = min(start + SEGMENT_DURATION, audio_duration) + tokens = current_tokens + + if tokens is None or not len(tokens): + continue + + start_sample = min(round(start * SAMPLE_RATE), audio.shape[-1]) + end_sample = min(round(end * SAMPLE_RATE), audio.shape[-1]) + + # Extract features on the audio segment + sub_audio = audio_minimum_padding(audio[start_sample:end_sample]) + + mfcc = whisper.log_mel_spectrogram(sub_audio, n_mels).to(model.device) + mfcc = whisper.pad_or_trim(mfcc, N_FRAMES) + mfcc = mfcc.unsqueeze(0) + + segment_tokens_check = [] + if tokens[0] >= tokenizer.timestamp_begin: + segment_tokens_check.append(tokens[0]) + while tokens[0] >= tokenizer.timestamp_begin: + tokens = tokens[1:] + assert len(tokens), "Got transcription with only timestamps!" + last_token_check = None + while tokens[-1] >= tokenizer.timestamp_begin: + last_token_check = tokens[-1] + tokens = tokens[:-1] + + sot_sequence = tokenizer.sot_sequence + if language and len(sot_sequence) == 3: + sot_sequence = ( + sot_sequence[0], + tokenizer.to_language_token(language), + sot_sequence[2], + ) + tokens = [ + *sot_sequence, + tokenizer.timestamp_begin, + ] + tokens + + i_start = len(sot_sequence) + + with torch.no_grad(): + logprobs = model(mfcc, torch.Tensor(tokens).int().to(model.device).unsqueeze(0)) + logprobs = F.log_softmax(logprobs, dim=-1) + + end_token = tokenizer.timestamp_begin + round(min(N_FRAMES * HOP_LENGTH, end_sample - start_sample) // AUDIO_SAMPLES_PER_TOKEN) + tokens = tokens[i_start:] + [end_token] + attention_weights = [w[:, :, i_start-1:, :] for w in attention_weights] + + ws = perform_word_alignment( + tokens, + attention_weights, + tokenizer, + use_space=use_space, + alignment_heads=alignment_heads, + remove_punctuation_from_words=remove_punctuation_from_words, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + detect_disfluencies=detect_disfluencies, + mfcc=mfcc, + plot=plot_word_alignment, + ) + + segment_logprobs = [] + i_token = 1 + + for word in ws: + + word["start"] = round(word["start"] + start, 2) + word["end"] = round(word["end"] + start, 2) + + if trust_whisper_timestamps: + word.update({"idx_segment": i_segment}) + else: + assert i_token < len(tokens) + assert not len(word["tokens_indices"]) or word["tokens_indices"][0] == tokens[i_token] + word.update({"idx_segment": token_to_idx_segment[i_token]}) + i_token += len(word["tokens"]) + while i_token < len(tokens) and tokens[i_token] >= tokenizer.timestamp_begin: + i_token += 1 + + tok_indices = word["tokens_indices"] + segment_tokens_check.extend(tok_indices) + + if compute_word_confidence: + tok = word["tokens"] + i_end = i_start + len(tok) + if include_punctuation_in_confidence: + while len(tok) > 1 and len(tok[-1]) and tok[-1][-1] in _punctuation: # Note: look at the last character of token, to take into account "...", "!!", etc. + tok = tok[:-1] + tok_indices = tok_indices[:-1] + word_logprobs = [logprobs[:, step, tok] for (step, tok) in zip(range(i_start, i_start + len(tok_indices)), tok_indices)] + i_start = i_end + if len(word_logprobs): + word_logprobs = torch.cat(word_logprobs) + segment_logprobs.append(word_logprobs) + word_confidence = word_logprobs.mean().exp().item() + else: + word_confidence = 0 + word.update({"confidence": round_confidence(word_confidence)}) + + words.append(word) + + if verbose: + print_timestamped(word) + + if last_token_check is not None: + segment_tokens_check.append(last_token_check) + if trust_whisper_timestamps: + if segment_tokens_check != segment["tokens"]: + assert len(segment_tokens_check) < len(segment["tokens"]), \ + f"First should be longer by one token: '{tokenizer.decode_with_timestamps(segment_tokens_check)}' should include '{tokenizer.decode_with_timestamps(segment['tokens'])}'" + assert segment_tokens_check[:-1] == segment["tokens"][:len(segment_tokens_check)-1], \ + f"Got inconsistent tokens: {tokenizer.decode_with_timestamps(segment_tokens_check)} != {tokenizer.decode_with_timestamps(segment['tokens'])}" + segment["tokens"] = segment_tokens_check + segment["text"] = tokenizer.decode(segment["tokens"]) + # else: TODO + + if len(segment_logprobs): + segment.update({"confidence": round_confidence(torch.cat(segment_logprobs).mean().exp().item())}) + + if len(ws): + previous_end = ws[-1]["end"] + + if not trust_whisper_timestamps: + current_tokens = [] + token_to_idx_segment = [] + + finally: + + # Remove hooks + for hook in all_hooks: + hook.remove() + + if language_probs: + transcription["language_probs"] = language_probs + + return (transcription, words) + +def get_audio_tensor(audio, device="cpu"): + if isinstance(audio, str): + audio = whisper.load_audio(audio) + if isinstance(audio, np.ndarray): + audio = torch.Tensor(audio) + else: + assert isinstance(audio, torch.Tensor), f"Got unexpected audio of type {type(audio)}" + return audio.to(device) + +def audio_minimum_padding(audio): + if audio.shape[-1] <= 200: + return whisper.pad_or_trim(audio, 201) + return audio + + +def should_use_space(language): + return norm_language(language) not in ["zh", "ja", "th", "lo", "my", "yue"] + +def norm_language(language): + if language is None: + return "en" + return whisper.tokenizer.TO_LANGUAGE_CODE.get(language.lower(), language) + +def print_timestamped(w): + line = f"[{format_timestamp(w['start'])} --> {format_timestamp(w['end'])}] {w['text']}\n" + # compared to just `print(line)`, this replaces any character not representable using + # the system default encoding with an '?', avoiding UnicodeEncodeError. + sys.stdout.write(line.encode(sys.getdefaultencoding(), errors="replace").decode()) + sys.stdout.flush() + + +def get_logit_filters(model, whisper_options, prompt = None): + if is_transformer_model(model): + # import transformers + # transformers.WhisperTimeStampLogitsProcessor + raise NotImplementedError("TODO") + decoding_options = get_decoding_options(whisper_options) + if "initial_prompt" in decoding_options: + prompt0 = decoding_options.pop("initial_prompt") + if prompt is None: + prompt = prompt0 + if prompt is not None: + decoding_options["prompt"] = prompt + decoding_options = whisper.DecodingOptions( + without_timestamps=False, + max_initial_timestamp=1.0, + prefix=None, + suppress_blank=True, + **decoding_options + ) + + # This performs some checks on the options + decoding_task = whisper.decoding.DecodingTask(model, decoding_options) + return decoding_task.logit_filters + +def get_decoding_options(whisper_options): + return dict([(k,v) for (k,v) in whisper_options.items() + if k not in [ + "no_speech_threshold", + "logprob_threshold", + "compression_ratio_threshold", + "condition_on_previous_text", + "verbose", + ] + ]) + +def get_tokenizer(model, task="transcribe", language="en"): + if is_transformer_model(model): + tokenizer = model.tokenizer + tokenizer.sot_sequence = ( + tokenizer.sot, + tokenizer.to_language_token(language or "en"), + tokenizer.to_task_token(task), + ) + tokenizer.sot_sequence + return model.tokenizer + try: + return whisper.tokenizer.get_tokenizer( + model.is_multilingual, + num_languages=model.num_languages if hasattr(model, "num_languages") else 99, + task=task, language=language + ) + except TypeError: # Old openai-whisper version + return whisper.tokenizer.get_tokenizer( + model.is_multilingual, + task=task, language=language + ) + +def perform_word_alignment( + tokens, + attention_weights, + tokenizer, + use_space=True, + mfcc=None, + refine_whisper_precision_nframes=0, + remove_punctuation_from_words=False, + include_punctuation_in_timing=False, # Was True before 1.9 + unfinished_decoding=False, + alignment_heads=None, + medfilt_width=9, + qk_scale=1.0, + detect_disfluencies=True, + subwords_can_be_empty=True, # Was False before 1.11 + plot=False, + debug=False, +): + """ + Perform word alignment on the given tokens and attention weights. + Returns a list of (word, start_time, end_time) tuples. + + tokens: list of tokens (integers) + attention_weights: list of attention weights (torch tensors) + tokenizer: tokenizer used to tokenize the text + use_space: whether to use spaces to split the tokens into words (should be true for all languages except Japanese, Chinese, ...) + mfcc: MFCC features (used to identify padded region, and for plotting) + refine_whisper_precision_nframes: precision time + remove_punctuation_from_words: whether to remove punctuation from words + include_punctuation_in_timing: whether to include punctuation in the timing of (previous) words + unfinished_decoding: whether the decoding is unfinished (e.g. because the model is stuck) + alignment_heads: list of attention heads to use for alignment + medfilt_width: width of the median filter used to smooth the attention weights + qk_scale: scale factor applied to the attention weights + plot: whether to plot the word alignment + debug: whether to print debug information + """ + + assert len(tokens) > 1, f"Got unexpected sequence of tokens of length {len(tokens)} {tokenizer.decode_with_timestamps(tokens)}" + start_token = tokens[0] - tokenizer.timestamp_begin + end_token = tokens[-1] - tokenizer.timestamp_begin + + # Check start / end tokens + if start_token < 0: + raise RuntimeError(f"Missing start token in: {tokenizer.decode_with_timestamps(tokens)}") + if len(tokens) == 1 or end_token < 0: + # This can happens when Whisper is stucked as a Language Model + if debug: + logger.debug(f"Missing end token in {tokenizer.decode_with_timestamps(tokens)}") + end_token = N_FRAMES // 2 + if end_token == start_token and refine_whisper_precision_nframes == 0: + if debug: + logger.debug(f"Got empty segment in {tokenizer.decode_with_timestamps(tokens)}") + return [] + + # Let a minimal duration given the number of tokens (see https://github.com/linto-ai/whisper-timestamped/issues/67) + end_token = min(N_FRAMES // 2, max(end_token, start_token + len(tokens))) + + # Put some margin around the segment + if refine_whisper_precision_nframes > 0: + start_token = max(start_token - refine_whisper_precision_nframes, 0) + end_token = min(end_token + refine_whisper_precision_nframes, N_FRAMES // 2) + + if end_token <= start_token: + raise RuntimeError(f"Got segment with null or negative duration {tokenizer.decode_with_timestamps(tokens)}: {start_token} {end_token}") + + start_time = start_token * AUDIO_TIME_PER_TOKEN + # end_time = end_token * AUDIO_TIME_PER_TOKEN + + split_tokens = split_tokens_on_spaces if use_space else split_tokens_on_unicode + words, word_tokens, word_tokens_indices = split_tokens(tokens, tokenizer, remove_punctuation_from_words=remove_punctuation_from_words) + + # If the last token is a punctuation that comes after a word + # group this final punctuation with the final timestamp + # This is to avoid assigning the final punctuation to a big silence or a noise/music background coming after + num_punctuations_per_tokens = [ + 0 if len(w) == 1 or w[-1] not in _punctuation else 1 + for w in word_tokens + ] + if include_punctuation_in_timing: + num_punctuations_per_tokens[:-2]=[0]*(len(num_punctuations_per_tokens)-2) + + for i, w in enumerate(attention_weights): + assert w.shape[-2] == len(tokens), f"Attention weights have wrong shape: {w.shape[-2]} (expected {len(tokens)})." + weights = torch.cat(attention_weights) # layers * heads * tokens * frames + + num_tokens = weights.shape[-2] + num_frames = end_token - start_token + if num_tokens > num_frames: + logger.warning(f"Too much text ({num_tokens} tokens) for the given number of frames ({num_frames}) in: {tokenizer.decode_with_timestamps(tokens)}\nThe end of the text will be removed.") + return perform_word_alignment( + tokens[:num_frames-1] + [tokens[-1]], + [torch.cat([w[:, :, :num_frames-1, :], w[:, :, -1:, :]], dim=-2) + for w in attention_weights], + tokenizer, + use_space=use_space, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + medfilt_width=medfilt_width, + qk_scale=qk_scale, + alignment_heads=alignment_heads, + mfcc=mfcc, + plot=plot, + remove_punctuation_from_words=remove_punctuation_from_words, + detect_disfluencies=detect_disfluencies, + subwords_can_be_empty=subwords_can_be_empty, + unfinished_decoding=True, + debug=debug, + ) + + assert end_token <= weights.shape[-1] + assert len(tokens) == num_tokens + + weights = weights[..., start_token: end_token].cpu() # layers * heads * tokens * frames + + if alignment_heads is None: + weights = weights.reshape(-1, *weights.shape[-2:]) # N * tokens * frames + else: + weights = torch.stack([weights[l][h] for l, h in alignment_heads.indices().T]) + weights = median_filter(weights, (1, 1, medfilt_width)) + weights = torch.tensor(weights * qk_scale).softmax(dim=-1) + weights = weights.mean(axis=(0)) # average over layers and heads # tokens * frames + weights = weights / weights.norm(dim=-2, keepdim=True) # This was before the mean before 1.9 + weights = -weights.double().numpy() + worse_weight = 0 + + # Get the limit of audio duration + max_duration = None + if mfcc is not None: + max_duration = find_start_padding(mfcc) + if max_duration is not None: + max_duration = max_duration // 2 + + # Enforce the max duration + if max_duration: + if start_token >= max_duration: + logger.warning(f"Got start time outside of audio boundary") + else: + weights[:-1, max_duration:] = worse_weight + + # Encourage to start early + weights[0, 0] = weights.min() + # weights[0, refine_whisper_precision_nframes*2:] = worse_weight + + if subwords_can_be_empty: + step_pattern = dtw.stepPattern.symmetric1 + else: + # Similar as "symmetric1" but without the possibility to have the same timestamp for two tokens + step_pattern = dtw.stepPattern.StepPattern(dtw.stepPattern._c( + 1, 1, 1, -1, + 1, 0, 0, 1, + 2, 0, 1, -1, + 2, 0, 0, 1, + )) + alignment = dtw.dtw(weights, step_pattern=step_pattern) + + global num_alignment_for_plot + num_alignment_for_plot += 1 + + if plot: + import matplotlib.pyplot as plt + import matplotlib.ticker as ticker + + plot_mfcc = 1 if mfcc is not None else 0 + plot_disfluencies = 1 if detect_disfluencies else 0 + nplots = (1 + plot_mfcc + plot_disfluencies) + + plt.subplots(nplots, 1, figsize=(16, 9), gridspec_kw={'height_ratios': [3] + [1] * (nplots - 1)}) + plt.subplot(nplots, 1, 1, frameon=False) + + plt.imshow(-weights, aspect="auto") + plt.plot(alignment.index2s, alignment.index1s, color="red") + + xticks = np.arange(0, weights.shape[1], 1 / AUDIO_TIME_PER_TOKEN) + xticklabels = [round_timestamp(x) for x in xticks * AUDIO_TIME_PER_TOKEN + start_time] + + ylims = plt.gca().get_ylim() + + ax = plt.gca() + ax.tick_params('both', length=0, width=0, which='minor', pad=6) + + ax.yaxis.set_ticks_position("left") + ax.yaxis.set_label_position("left") + ax.invert_yaxis() + ax.set_ylim(ylims) + + major_ticks = [-0.5] + minor_ticks = [] + current_y = 0 + + for word, word_token in zip(words, word_tokens): + minor_ticks.append(current_y + len(word_token) / 2 - 0.5) + current_y += len(word_token) + major_ticks.append(current_y - 0.5) + + words_with_subwords = ["|".join(s).strip() for (w, s) in zip(words, word_tokens)] + + ax.yaxis.set_minor_locator(ticker.FixedLocator(minor_ticks)) + ax.yaxis.set_minor_formatter( + ticker.FixedFormatter(words_with_subwords)) + ax.set_yticks(major_ticks) + ax.yaxis.set_major_formatter(ticker.NullFormatter()) + for y in major_ticks: + plt.axhline(y, color="black", linestyle="dashed") + + plt.ylabel("Words") + + if plot_mfcc: + plt.xticks(xticks) + plt.setp(plt.gca().get_xticklabels(), visible=False) + + xticks *= 2 + + plt.subplot(nplots, 1, 2, frameon=False) + plt.imshow(mfcc[0, :, start_token * 2: end_token * 2].cpu(), aspect="auto", origin="lower") + plt.yticks([]) + plt.ylabel("MFCC") + + plt.xticks(xticks, xticklabels) + plt.xlabel("Time (s)") + + jumps = np.diff(alignment.index1s) + jumps = np.pad(jumps, (1, 0), constant_values=1) + jumps = jumps.astype(bool) + jumps = alignment.index2s[jumps] + jumps = np.pad(jumps, (0, 1), constant_values=alignment.index2s[-1]) + + jumps_start = jumps + disfluences = {} + if detect_disfluencies: + jumps_start = copy.copy(jumps) + + for (i_token, (tok, begin, end)) in enumerate(zip(tokens, jumps[:-1], jumps[1:])): + + # Find local maxima in the portion of attention weights + attention_weights = -weights[i_token, begin:end] + peaks, properties = find_peaks(attention_weights, + width=3, + prominence=0.02, + ) + # If more than + if len(peaks) > 1: + if "left_ips" in properties: + left = [round(x) for x in properties["left_ips"]] + else: + left = properties["left_bases"] + + new_begin = left[-1] + begin + + jumps_start[i_token] = new_begin + + if new_begin != begin: + is_punctuation = tokenizer.decode_with_timestamps([tok]) in _punctuation + if not is_punctuation: + disfluences[i_token] = (begin, jumps_start[i_token]) + else: + disfluences[i_token+1] = (begin, end) + + if plot: + plt.subplot(nplots, 1, 2 + plot_mfcc, frameon=False) + plt.plot(range(begin,end), attention_weights) + plt.xlim(0, end) + + for i, p in enumerate(peaks): + color = 'red' if (len(peaks)>1 and i1 else 'green' + barxxy(begin+properties["left_bases"], begin+properties["right_bases"], properties.get("prominences",[1]*len(properties["left_bases"])), alpha=0.5, + # put a line with a custom color + linewidth=1, edgecolor=color + ) + if "left_ips" in properties: + for left in properties["left_ips"]: + plt.vlines(begin+left, 0, 0.5, color='green', linestyle=':') + for right in properties["right_ips"]: + plt.vlines(begin+right, 0, 0.5, color='red', linestyle=':') + + + # display the word-level timestamps in a table + word_boundaries = np.cumsum([len(t) for t in word_tokens]) + word_boundaries = np.pad(word_boundaries, (1, 0)) + begin_times = jumps_start[word_boundaries[:-1]] + end_times = jumps[word_boundaries[1:] - num_punctuations_per_tokens] + + begin_times = begin_times * AUDIO_TIME_PER_TOKEN + end_times = end_times * AUDIO_TIME_PER_TOKEN + + if detect_disfluencies: + to_be_added = [] + i_start = 0 + for i_word, toks in enumerate(word_tokens[:-1]): + i_end = i_start + len(toks) + if i_start in disfluences and i_word > 0: + begin, end = disfluences[i_start] + begin *= AUDIO_TIME_PER_TOKEN + end *= AUDIO_TIME_PER_TOKEN + to_be_added.append((i_word, begin, end)) + i_start = i_end + # Add from the end to avoid messing up the indices + for (i_word, begin, end) in to_be_added[-1::-1]: + words.insert(i_word, DISFLUENCY_MARK) + word_tokens.insert(i_word, []) + word_tokens_indices.insert(i_word, []) + begin_times = np.insert(begin_times, i_word, begin) + end_times = np.insert(end_times, i_word, end) + + # Ignore start / end tokens + if not refine_whisper_precision_nframes: + begin_times[1] = begin_times[0] + if not refine_whisper_precision_nframes: + end_times[-2] = end_times[-1] + if unfinished_decoding: + words = words[1:] + word_tokens = word_tokens[1:] + word_tokens_indices = word_tokens_indices[1:] + begin_times = begin_times[1:] + end_times = end_times[1:] + else: + words = words[1:-1] + word_tokens = word_tokens[1:-1] + word_tokens_indices = word_tokens_indices[1:-1] + begin_times = begin_times[1:-1] + end_times = end_times[1:-1] + + if plot: + ymin = 1 + + plt.subplot(nplots, 1, 1) + for i, (w, ws, begin, end) in enumerate(zip(words, word_tokens, begin_times, end_times)): + ymax = ymin + len(ws) + if mfcc is None: + plt.text(begin / AUDIO_TIME_PER_TOKEN, num_tokens-0.5, w, ha="left", va="top", color="red") + for x in [begin, end,]: + plt.axvline(x / AUDIO_TIME_PER_TOKEN, color="red", linestyle="dotted", + ymin=1-ymin/num_tokens, + ymax=0, # 1-ymax/num_tokens, + ) + ymin = ymax + + if plot_mfcc: + plt.subplot(nplots, 1, 2) + for i, (w, begin, end) in enumerate(zip(words, begin_times, end_times)): + plt.text(begin * 2 / AUDIO_TIME_PER_TOKEN, mfcc.shape[-2]*1.05, w, ha="left", va="bottom", color="red") + for x in [begin, end,]: + plt.axvline(x * 2 / AUDIO_TIME_PER_TOKEN, color="red", linestyle="dotted") + + if isinstance(plot, str): + plt.savefig(f"{plot}.alignment{num_alignment_for_plot:03d}.jpg", bbox_inches='tight', pad_inches=0) + else: + plt.show() + + return [ + dict( + text=word, + start=round_timestamp(begin + start_time), + end=round_timestamp(end + start_time), + tokens=tokens, + tokens_indices=tokens_indices, + ) + for word, begin, end, tokens, tokens_indices in zip(words, begin_times, end_times, word_tokens, word_tokens_indices) + if not word.startswith("<|") + ] + +def find_start_padding(mfcc): + """ Return start of padding given the mfcc, or None if there is no padding """ + last_mfcc = mfcc[0, :, -1] + if torch.min(last_mfcc) == torch.max(last_mfcc) == 0: + candidate_index = mfcc.shape[-1] - 2 + while candidate_index > 0: + candidate = mfcc[0, :, candidate_index] + if not torch.equal(candidate, last_mfcc): + return candidate_index + 1 + candidate_index -= 1 + return 0 # WTF!? + +def round_confidence(x): + return round(x, 3) + +def round_timestamp(x): + return round(x, 2) + +_punctuation = "".join(c for c in string.punctuation if c not in ["-", "'"]) + "。,!?:”、…" + +def split_tokens_on_unicode(tokens: list, tokenizer, remove_punctuation_from_words=False, isolate_punctuations=False): + words = [] + word_tokens = [] + word_tokens_indices = [] + current_tokens = [] + + for token in tokens: + current_tokens.append(token) + decoded = tokenizer.decode_with_timestamps([t for t in current_tokens if t < tokenizer.eot or t >= tokenizer.timestamp_begin]) + if "\ufffd" not in decoded: + empty_tokens = [""] * (len(current_tokens)-1) + punctuation = not isolate_punctuations and (decoded.strip() and decoded.strip() in _punctuation) + previous_special = len(word_tokens_indices) > 0 and (word_tokens_indices[-1][-1] >= tokenizer.timestamp_begin) + if punctuation and not previous_special: + if len(words) == 0: + words = [""] + word_tokens = [[]] + if not remove_punctuation_from_words: + words[-1] += decoded + word_tokens[-1].extend(empty_tokens + [decoded]) + word_tokens_indices[-1].extend(current_tokens) + else: + words.append(decoded) + word_tokens.append(empty_tokens + [decoded]) + word_tokens_indices.append(current_tokens) + current_tokens = [] + + return words, word_tokens, word_tokens_indices + + +def split_tokens_on_spaces(tokens: torch.Tensor, tokenizer, remove_punctuation_from_words=False): + subwords, subword_tokens_list, subword_tokens_indices_list = split_tokens_on_unicode(tokens, tokenizer, remove_punctuation_from_words=remove_punctuation_from_words) + words = [] + word_tokens = [] + word_tokens_indices = [] + + for i, (subword, subword_tokens, subword_tokens_indices) in enumerate(zip(subwords, subword_tokens_list, subword_tokens_indices_list)): + special = (subword_tokens_indices[0] >= tokenizer.timestamp_begin) + previous_special = (i > 0) and (subword_tokens_indices_list[i-1][0] >= tokenizer.timestamp_begin) + next_special = (i < len(subword_tokens_indices_list)-1) and (subword_tokens_indices_list[i+1][0] >= tokenizer.timestamp_begin) + previous_space = (i > 0) and (not subwords[i-1].strip()) + is_space = not subword.strip() + with_space = subword.startswith(" ") and not is_space + punctuation = not is_space and subword.strip() in _punctuation + if special or (not previous_space and (previous_special or (with_space and not punctuation) or (is_space and not next_special))): + words.append(subword.strip()) + word_tokens.append(subword_tokens) + word_tokens_indices.append(subword_tokens_indices) + else: + words[-1] = words[-1] + subword.strip() + word_tokens[-1].extend(subword_tokens) + word_tokens_indices[-1].extend(subword_tokens_indices) + + return words, word_tokens, word_tokens_indices + +def check_vad_method(method, with_version=False): + """ + Check whether the VAD method is valid and return the method in a consistent format + + method: str or list or True or False + """ + if method in [True, "True", "true"]: + return check_vad_method("silero") # default method + elif method in [None, False, "False", "false", "None", "none"]: + return None + elif not isinstance(method, str) and hasattr(method, '__iter__'): + # list of explicit timestamps + checked_pairs = [] + for s_e in method: + assert len(s_e) == 2, f"Got unexpected element {s_e} in the list of VAD segments. Expect (start, end) pairs" + checked_pairs.append(tuple(s_e)) + return checked_pairs + elif isinstance(method, str) and method.startswith("silero"): + version = None + if method != "silero": + assert method.startswith("silero:"), f"Got unexpected VAD method {method}" + version = method.split(":")[1] + if not version.startswith("v"): + version = "v" + version + try: + assert float(version[1:]) >= 1 + except: + raise ValueError(f"Got unexpected silero version {version} (please check https://github.com/snakers4/silero-vad/wiki/Version-history-and-Available-Models)") + if with_version: + return ("silero", version) + else: + return method + elif method == "auditok": + try: + import auditok + except ImportError: + raise ImportError("Please install auditok to use the auditok VAD (or use another VAD method)") + else: + try: + method = eval(method) + assert hasattr(method, '__iter__') + except: + raise ValueError(f"Got unexpected VAD method {method}") + return check_vad_method(method, with_version=with_version) + return method + +_silero_vad_model = {} +_has_onnx = None +def get_vad_segments(audio, + sample_rate=SAMPLE_RATE, + output_sample=False, + min_speech_duration=0.1, + min_silence_duration=0.1, + dilatation=0.5, + method="silero", + ): + """ + Get speech segments from audio using Silero VAD + parameters: + audio: torch.Tensor + audio data *in 16kHz* + output_sample: bool + if True, return start and end in samples instead of seconds + min_speech_duration: float + minimum duration (in sec) of a speech segment + min_silence_duration: float + minimum duration (in sec) of a silence segment + dilatation: float + how much (in sec) to enlarge each speech segment detected by the VAD + method: str or list + VAD method to use (auditok, silero, silero:v3.1) + """ + global _silero_vad_model, _silero_get_speech_ts, _has_onnx + + if isinstance(method, list): + # Explicit timestamps + segments = [{"start": s * sample_rate, "end": e * sample_rate} for (s, e) in method] + dilatation = 0 + + elif isinstance(method, str) and method.startswith("silero"): + + version = None + _, version = check_vad_method(method, True) + # See discussion https://github.com/linto-ai/whisper-timestamped/pull/142/files#r1398326287 + need_folder_hack = version and (version < "v4") + + if _silero_vad_model.get(version) is None: + # ONNX support since 3.1 in silero + if (version is None or version >= "v3.1") and (_has_onnx is not False): + onnx=True + try: + import onnxruntime + onnxruntime.set_default_logger_severity(3) # Remove warning "Removing initializer 'XXX'. It is not used by any node and should be removed from the model." + _has_onnx = True + except ImportError as err: + logger.warning(f"Please install onnxruntime to use more efficiently silero VAD") + _has_onnx = False + onnx=False + else: + onnx=False + + # Choose silero version because of problems with version 4, see https://github.com/linto-ai/whisper-timestamped/issues/74 + torch_home = os.environ.get('TORCH_HOME', '~/.cache/torch') + repo_or_dir_master = os.path.expanduser(torch_home + "/hub/snakers4_silero-vad_master") + repo_or_dir_specific = os.path.expanduser(torch_home + f"/hub/snakers4_silero-vad_{version}") if version else repo_or_dir_master + repo_or_dir = repo_or_dir_specific + tmp_folder = None + def apply_folder_hack(): + nonlocal tmp_folder + if os.path.exists(repo_or_dir_master): + tmp_folder = repo_or_dir_master + ".tmp" + shutil.move(repo_or_dir_master, tmp_folder) + # Make a symlink to the v3.1 model, otherwise it fails + input_exists = os.path.exists(repo_or_dir_specific) + if not input_exists: + # Make dummy file for the symlink to work + os.makedirs(repo_or_dir_specific, exist_ok=True) + os.symlink(repo_or_dir_specific, repo_or_dir_master) + if not input_exists: + shutil.rmtree(repo_or_dir_specific) + + source = "local" + if not os.path.exists(repo_or_dir): + # Load specific version of silero + repo_or_dir = f"snakers4/silero-vad:{version}" if version else "snakers4/silero-vad" + source = "github" + if need_folder_hack: + apply_folder_hack() + try: + silero_vad_model, utils = torch.hub.load(repo_or_dir=repo_or_dir, model="silero_vad", onnx=onnx, source=source) + _silero_vad_model[version] = silero_vad_model + except ImportError as err: + raise RuntimeError(f"Please install what is needed to use the silero VAD (or use another VAD method)") from err + except Exception as err: + raise RuntimeError(f"Problem when installing silero with version {version}. Check versions here: https://github.com/snakers4/silero-vad/wiki/Version-history-and-Available-Models") from err + finally: + if need_folder_hack: + if os.path.exists(repo_or_dir_master): + os.remove(repo_or_dir_master) + if tmp_folder: + shutil.move(tmp_folder, repo_or_dir_master) + assert os.path.isdir(repo_or_dir_specific), f"Unexpected situation: missing {repo_or_dir_specific}" + + _silero_get_speech_ts = utils[0] + + # Cheap normalization of the volume + audio = audio / max(0.1, audio.abs().max()) + + segments = _silero_get_speech_ts(audio, _silero_vad_model[version], + sampling_rate = sample_rate, + min_speech_duration_ms = round(min_speech_duration * 1000), + min_silence_duration_ms = round(min_silence_duration * 1000), + return_seconds = False, + ) + + elif method == "auditok": + import auditok + + # Cheap normalization of the volume + audio = audio / max(0.1, audio.abs().max()) + + data = (audio.numpy() * 32767).astype(np.int16).tobytes() + + audio_duration = len(audio) / sample_rate + + segments = auditok.split( + data, + sampling_rate=sample_rate, # sampling frequency in Hz + channels=1, # number of channels + sample_width=2, # number of bytes per sample + min_dur=min_speech_duration, # minimum duration of a valid audio event in seconds + max_dur=audio_duration, # maximum duration of an event + max_silence=min(audio_duration*.95, min_silence_duration), # maximum duration of tolerated continuous silence within an event + energy_threshold=50, + drop_trailing_silence=True, + ) + + segments = [{"start": s._meta.start * sample_rate, "end": s._meta.end * sample_rate} for s in segments] + + else: + raise ValueError(f"Got unexpected VAD method {method}") + + if dilatation > 0: + dilatation = round(dilatation * sample_rate) + new_segments = [] + for seg in segments: + new_seg = { + "start": max(0, seg["start"] - dilatation), + "end": min(len(audio), seg["end"] + dilatation) + } + if len(new_segments) > 0 and new_segments[-1]["end"] >= new_seg["start"]: + new_segments[-1]["end"] = new_seg["end"] + else: + new_segments.append(new_seg) + segments = new_segments + + ratio = 1 if output_sample else 1 / sample_rate + + if ratio != 1: + for seg in segments: + seg["start"] *= ratio + seg["end"] *= ratio + if output_sample: + for seg in segments: + seg["start"] = round(seg["start"]) + seg["end"] = round(seg["end"]) + return segments + +def remove_non_speech(audio, + use_sample=False, + min_speech_duration=0.1, + min_silence_duration=1, + dilatation=0.5, + sample_rate=SAMPLE_RATE, + method="silero", + avoid_empty_speech=False, + plot=False, + ): + """ + Remove non-speech segments from audio (using Silero VAD), + glue the speech segments together and return the result along with + a function to convert timestamps from the new audio to the original audio + + parameters: + audio: torch.Tensor + audio data *in 16kHz* + use_sample: bool + if True, return start and end in samples instead of seconds + min_speech_duration: float + minimum duration (in sec) of a speech segment + min_silence_duration: float + minimum duration (in sec) of a silence segment + dilatation: float + how much (in sec) to enlarge each speech segment detected by the VAD + method: str + method to use to remove non-speech segments + avoid_empty_speech: bool + if True, avoid returning an empty speech segment (re) + plot: bool or str + if True, plot the result. + If a string, save the plot to the given file + """ + + segments = get_vad_segments( + audio, + sample_rate=sample_rate, + output_sample=True, + min_speech_duration=min_speech_duration, + min_silence_duration=min_silence_duration, + dilatation=dilatation, + method=method, + ) + + segments = [(seg["start"], seg["end"]) for seg in segments] + if len(segments) == 0: + if avoid_empty_speech: + segments = [(0, audio.shape[-1])] + else: + return torch.Tensor([]), [], lambda t, t2 = None: t if t2 is None else [t, t2] + + audio_speech = torch.cat([audio[..., s:e] for s,e in segments], dim=-1) + + if plot: + import matplotlib.pyplot as plt + plt.figure() + max_num_samples = 10000 + step = (audio.shape[-1] // max_num_samples) + 1 + times = [i*step/sample_rate for i in range((audio.shape[-1]-1) // step + 1)] + plt.plot(times, audio[::step]) + for s, e in segments: + plt.axvspan(s/sample_rate, e/sample_rate, color='red', alpha=0.1) + if isinstance(plot, str): + plt.savefig(f"{plot}.VAD.jpg", bbox_inches='tight', pad_inches=0) + else: + plt.show() + + if not use_sample: + segments = [(float(s)/sample_rate, float(e)/sample_rate) for s,e in segments] + + return audio_speech, segments, lambda t, t2 = None: do_convert_timestamps(segments, t, t2) + +def do_convert_timestamps(segments, t, t2 = None): + """ + Convert timestamp from audio without non-speech segments to original audio (with non-speech segments) + + parameters: + segments: list of tuple (start, end) corresponding to non-speech segments in original audio + t: timestamp to convert + t2: second timestamp to convert (optional), when the two timestamps should be in the same segment + """ + assert len(segments) + ioffset = 0 # Input offset + ooffset = 0 # Output offset + ipreviousend = 0 + result = [] + for istart, iend in segments: + ostart = ooffset + oend = ostart + (iend - istart) + ooffset = oend + ioffset += istart - ipreviousend + ipreviousend = iend + t_in = t <= oend + t2_in = t_in if t2 is None else t2 <= oend + if t_in or t2_in: + result.append([ + max(istart, min(iend, ioffset + t)), + max(istart, min(iend, ioffset + t2)) if t2 is not None else None + ]) + if t_in and t2_in: + break + if not len(result): + result.append( + [ioffset + t, ioffset + t2 if t2 is not None else None] + ) + + if len(result) > 1: + # Minimize difference between durations + result = sorted(result, key=lambda x: abs(abs(t2-t) - abs(x[1]-x[0]))) + result = result[0] + if t2 is None: + result = round(result[0], 2) + else: + result = [round(x, 2) for x in result] + return result + +def remove_last_null_duration_words(transcription, words, recompute_text=False): + """ + Remove words with null duration happening at the end of a chunk (probable Whisper hallucinations) + """ + # First group segments by audio chunk + segments_groups = {} + seek = None + current_chunk = -1 + for i, segment in enumerate(transcription["segments"]): + if segment["seek"] != seek: + current_chunk += 1 + seek = segment["seek"] + segments_groups[i] = current_chunk + + # Remove words with null duration happening at the end of a chunk + current_chunk = -1 + is_last_empty = False + to_remove = [] + for i, word in enumerate(words[::-1]): # Reverse order + i = len(words) - i - 1 + empty = (word["start"] == word["end"]) + idx_segment = word["idx_segment"] + group = segments_groups[idx_segment] + if current_chunk != group: + is_last_empty = empty + current_chunk = group + elif not empty: + is_last_empty = False + if is_last_empty: + # Remove word + to_remove.append(i) + # Shorten text of segment + full_word = "".join(word["tokens"]) + logger.debug(f"Removing word {i+1}/{len(words)} \"{full_word}\" with empty duration at the end of segment {idx_segment+1}/{len(transcription['segments'])}") + segment = transcription["segments"][idx_segment] + text = segment["text"] + if not text.endswith(full_word): # see issue #62 + if text.endswith(full_word[:-1]): + full_word = full_word[:-1] + elif text[:-1].endswith(full_word): + text = text[:-1] + else: + raise RuntimeError(f"\"{text}\" not ending with \"{full_word}\"") + text = text[:-len(full_word)] + if i > 0 and words[i-1]["idx_segment"] == idx_segment: + segment["text"] = text + else: + logger.debug(f"Removing empty segment {idx_segment}") + # Remove segment with no more words + transcription["segments"].pop(idx_segment) + for j in range(i+1, len(words)): + words[j]["idx_segment"] -= 1 + recompute_text = True + + for i in to_remove: + words.pop(i) # Warning: inplace modification + + if recompute_text: + transcription["text"] = "".join([s["text"] for s in transcription["segments"]]) + + return transcription, words + + +def ensure_increasing_positions(segments, min_duration=0): + """ + Ensure that "start" and "end" come in increasing order + """ + has_modified_backward = False + previous_end = 0 + for i, seg in enumerate(segments): + if seg["start"] < previous_end: + assert i > 0 + new_start = round_timestamp((previous_end + seg["start"]) / 2) + if new_start < segments[i-1]["start"] + min_duration: + new_start = previous_end + else: + segments[i-1]["end"] = new_start + has_modified_backward = True + seg["start"] = new_start + if seg["end"] <= seg["start"] + min_duration: + seg["end"] = seg["start"] + min_duration + previous_end = seg["end"] + if has_modified_backward: + return ensure_increasing_positions(segments, min_duration) + + previous_end = 0 + for seg in segments: + seg["start"] = round_timestamp(seg["start"]) + seg["end"] = round_timestamp(seg["end"]) + assert seg["start"] >= previous_end, f"Got segment {seg} coming before the previous finishes ({previous_end} > {seg['start']})" + assert seg["end"] >= seg["start"], f"Got segment {seg} with end < start" + previous_end = seg["end"] + + return segments + +## Some utilities for writing transcripts to files + +def flatten(list_of_lists, key = None): + for sublist in list_of_lists: + for item in sublist.get(key, []) if key else sublist: + yield item + +def remove_keys(list_of_dicts, key): + for d in list_of_dicts: + yield {k: d[k] for k in d.keys() - {key}} + + +def write_csv(transcript, file, sep = ",", text_first=True, format_timestamps=None, header=False): + writer = csv.writer(file, delimiter=sep) + if format_timestamps is None: format_timestamps = lambda x: x + if header is True: + header = ["text", "start", "end"] if text_first else ["start", "end", "text"] + if header: + writer.writerow(header) + if text_first: + writer.writerows( + [[segment["text"].strip(), format_timestamps(segment["start"]), format_timestamps(segment["end"])] for segment in transcript] + ) + else: + writer.writerows( + [[format_timestamps(segment["start"]), format_timestamps(segment["end"]), segment["text"].strip()] for segment in transcript] + ) + +# https://stackoverflow.com/questions/66588715/runtimeerror-cudnn-error-cudnn-status-not-initialized-using-pytorch +# CUDA initialization may fail on old GPU card +def force_cudnn_initialization(device=None, s=32): + if device is None: + device = get_default_device() + torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device=device), torch.zeros(s, s, s, s, device=device)) + +def get_default_device(): + if torch.cuda.is_available(): + device = "cuda" + elif find_spec('torch.xpu') is not None and torch.xpu.is_available(): + device = "xpu" + else: + device = "cpu" + return device + +# base85-encoded (n_layers, n_heads) boolean arrays indicating the cross-attention heads that are +# highly correlated to the word-level timing, i.e. the alignment between audio and text tokens. +_ALIGNMENT_HEADS = { + "tiny.en": b"ABzY8J1N>@0{>%R00Bk>$p{7v037`oCl~+#00", + "tiny": b"ABzY8bu8Lr0{>%RKn9Fp%m@SkK7Kt=7ytkO", + "base.en": b"ABzY8;40c<0{>%RzzG;p*o+Vo09|#PsxSZm00", + "base": b"ABzY8KQ!870{>%RzyTQH3`Q^yNP!>##QT-?_)10{>%RpeA61k&I|OI3I$65C{;;pbCHh0B{qLQ;+}v00", + "small": b"ABzY8DmU6=0{>%Rpa?J`kvJ6qF(V^F86#Xh7JUGMK}P%R7%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9", + "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj', + "large-v3": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00", +} + +_PARAMETERS_TO_MODEL_NAME = { + 37184256 : "tiny.en", + 37184640 : "tiny", + 71825408 : "base.en", + 71825920 : "base", + 240582144 : "small.en", + 240582912 : "small", + 762320896 : "medium.en", + 762321920 : "medium", + 1541384960 : "large", + 1541570560 : "large-v3", +} + +def get_alignment_heads(model, max_top_layer=3): + if hasattr(model, "alignment_heads"): # Since version 20230306 + return model.alignment_heads + num_parameters = _get_number_of_parameters(model) + num_layers = model.dims.n_text_layer + num_heads = model.dims.n_text_head + if num_parameters not in _PARAMETERS_TO_MODEL_NAME: + logger.warning("Could not retrieve alignment heads : taking all attention heads from the top layers") + return None + model_name = _PARAMETERS_TO_MODEL_NAME[num_parameters] + if model_name == "large": + if next(model.parameters())[0,0,0] > 0: + model_name = "large-v1" + else: + model_name = "large-v2" + return _get_alignment_heads(model_name, num_layers, num_heads) + +def _get_alignment_heads(model_name, num_layers, num_heads): + dump = _ALIGNMENT_HEADS[model_name] + array = np.frombuffer(gzip.decompress(base64.b85decode(dump)), dtype=bool).copy() + mask = torch.from_numpy(array).reshape(num_layers, num_heads) + alignment_heads = mask.to_sparse() + return alignment_heads + +def _get_number_of_parameters(model): + num_parameters = 0 + for name, p in model.named_parameters(): + if name in ["decoder.proj_out.weight", "model.encoder.embed_positions.weight"]: + continue + num_parameters += p.numel() + return num_parameters + +from typing import Optional, Union +def load_model( + name: str, + device: Optional[Union[str, torch.device]] = None, + backend: str = DEFAULT_BACKEND, + download_root: str = None, + in_memory: bool = False, +): + """ + Load a model from the given name or path. + + Parameters + ---------- + name : str + Name of the model or path to the model. + Examples: + - OpenAI-Whisper identifier: "large-v3", "medium.en", ... + - HuggingFace identifier: "openai/whisper-large-v3", "distil-whisper/distil-large-v2", ... + - File name: "path/to/model.pt", "path/to/model.ckpt", "path/to/model.bin" + - Folder name: "path/to/folder". The folder must contain either "pytorch_model.bin", "model.safetensors", or sharded versions of those, or "whisper.ckpt". + device : str or torch.device, optional + Device to use. If None, use CUDA if there is a GPU available, otherwise CPU. + backend : str, optional + Backend to use. Either "transformers" or "openai-whisper". + download_root : str, optional + Root folder to download the model to. If None, use the default download root (typically: ~/.cache) + in_memory : bool, optional + Whether to preload the model weights into host memory. + """ + if backend == "transformers": + try: + import transformers + except ImportError: + raise ImportError(f"If you want to use transformers backend, please install first the transformers library") + if name in whisper.available_models(): + name = f"openai/whisper-{name}" + # TODO: use download_root + # TODO: does in_memory makes sense? + cache_dir=os.path.join(download_root, "huggingface", "hub") if download_root else None, + try: + generation_config = transformers.GenerationConfig.from_pretrained(name, cache_dir=cache_dir) + except OSError: + generation_config = transformers.GenerationConfig.from_pretrained("openai/whisper-tiny", cache_dir=cache_dir) + processor = transformers.WhisperProcessor.from_pretrained(name, cache_dir=cache_dir) + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + precision = torch.float32 + model = transformers.WhisperForConditionalGeneration.from_pretrained( + name, + # load_in_8bit=True, + # load_in_4bit=True, + torch_dtype=precision, + # torch_dtype=torch.bfloat16, + # attn_implementation="flash_attention_2", + # attn_implementation="sdpa", + cache_dir=cache_dir, + ) + # model = model.to_bettertransformer() + + model = model.to(device) + return TransformerWhisperAsOpenAIWhisper(model, processor, generation_config, precision) + + elif backend not in ["openai", "openai-whisper"]: + raise ValueError(f"Got unexpected backend {backend}") + + extension = os.path.splitext(name)[-1] if os.path.isfile(name) else None + + if name in whisper.available_models() or extension == ".pt": + return whisper.load_model( + name, + device=device, + download_root=os.path.join(download_root, "whisper") if download_root else None, + in_memory=in_memory + ) + + # Otherwise, assume transformers + if extension in [".ckpt", ".bin"]: + model_path = name + else: + # Search for the cached file (download if necessary) + try: + import transformers + except ImportError: + raise ImportError(f"If you are trying to download a HuggingFace model with {name}, please install first the transformers library") + from transformers.utils import cached_file + + kwargs = dict( + cache_dir=os.path.join(download_root, "huggingface", "hub") if download_root else None, + use_auth_token=None, + revision=None, + ) + try: + model_path = cached_file(name, "pytorch_model.bin", **kwargs) + except OSError as err: + try: + model_path = None + for candidate in ["whisper.ckpt", "pytorch_model.bin.index.json", "model.safetensors", "model.safetensors.index.json"]: + try: + model_path = cached_file(name, candidate, **kwargs) + except OSError: + continue + if candidate.endswith("index.json"): + index_file = model_path + mapping = json.load(open(index_file)) + assert "weight_map" in mapping + assert isinstance(mapping["weight_map"], dict) + model_path = list(set(mapping["weight_map"].values())) + folder = os.path.dirname(index_file) + model_path = [os.path.join(folder, p) for p in model_path] + break + assert model_path is not None + except: + raise RuntimeError(f"Original error: {err}\nCould not find model {name} from HuggingFace nor local folders.") + # Load HF Model + hf_state_dict = torch_load(model_path) + + # Rename layers + for key in list(hf_state_dict.keys())[:]: + new_key = hf_to_whisper_states(key) + if new_key is None: + hf_state_dict.pop(key) + elif new_key != key: + hf_state_dict[new_key] = hf_state_dict.pop(key) + + + # Init Whisper Model and replace model weights + dims = whisper.model.ModelDimensions(**states_to_dim(hf_state_dict)) + + if "proj_out.weight" in hf_state_dict: + hf_state_dict["decoder.proj_out.weight"] = hf_state_dict.pop("proj_out.weight") + logger.warning("Using untied projection layer") + whisper_model = WhisperUntied(dims) + else: + whisper_model = whisper.model.Whisper(dims) + + whisper_model.load_state_dict(hf_state_dict) + del hf_state_dict + if hasattr(whisper_model, "alignment_heads"): + del whisper_model.alignment_heads # Will be recomputed later + whisper_model = whisper_model.to(device) + return whisper_model + +def torch_load(model_path): + if isinstance(model_path, list): + hf_state_dict = {} + for p in model_path: + d = torch_load(p) + for k in d: + assert k not in hf_state_dict, f"Found duplicate key {k} in {p}" + hf_state_dict.update(d) + else: + assert isinstance(model_path, str) + if model_path.endswith(".safetensors"): + from safetensors import safe_open + hf_state_dict = {} + with safe_open(model_path, framework="pt", device="cpu") as f: + for k in f.keys(): + hf_state_dict[k] = f.get_tensor(k) + else: + hf_state_dict = torch.load(model_path, map_location="cpu") + return hf_state_dict + +# Some helpers to manage transformers/openai-whisper model + +class TransformerWhisperAsOpenAIWhisper: + """ + Wrapper to use a transformers model as a whisper model (at least in whisper-timestamped) + """ + + def __init__(self, model, processor, generation_config, precision): + + self.model = model # transformers.WhisperForConditionalGeneration + self.processor = processor # transformers.WhisperProcessor + self.generation_config = generation_config # transformers.GenerationConfig + + self.device = model.device + self.precision = precision + + # Dimensions + model_config = model.config + self.dims = whisper.model.ModelDimensions( + n_mels = model_config.num_mel_bins, # model.get_encoder().get_input_embeddings().in_channels, # 80 + n_audio_ctx = model_config.max_source_positions, # 1500 + n_audio_state = model_config.d_model, # model.get_encoder().get_input_embeddings().out_channels, # 768 + n_audio_head = model_config.encoder_attention_heads, # model.get_encoder().layers[0].self_attn.num_heads, + n_audio_layer = model_config.encoder_layers, # len(model.get_encoder().layers), + n_vocab = model_config.vocab_size, # model.get_decoder().get_input_embeddings().num_embeddings, # ~51865 + n_text_ctx = model_config.max_length, # 448 + n_text_state = model_config.d_model, # model.get_decoder().get_input_embeddings().embedding_dim, # 768 + n_text_head = model_config.decoder_attention_heads, # model.get_decoder().layers[0].self_attn.num_heads, + n_text_layer = model_config.decoder_layers, # len(model.get_decoder().layers), + ) + + # Tokenization + self.tokenizer = processor.tokenizer + ( + self.tokenizer.sot, + self.tokenizer.eot, + self.tokenizer.timestamp_begin, + self.tokenizer.no_speech, + self.tokenizer.no_timestamps, + ) = self.tokenizer.convert_tokens_to_ids([ + "<|startoftranscript|>", + "<|endoftext|>", + "<|0.00|>", + "<|nospeech|>", + "<|notimestamps|>", + ]) + if self.tokenizer.decode([self.tokenizer.timestamp_begin], decode_with_timestamps=True) != "<|0.00|>": + # Sometimes, the tokenizer is weird and it is impossible to get the timestamp_begin token easily (e.g. with "qanastek/whisper-tiny-french-cased") + logger.warning("Getting timestamp_begin token is not straightforward for this model") + i = self.tokenizer.no_timestamps + 1 + maxi = i + 1000 + while self.tokenizer.decode([i], decode_with_timestamps=True) != "<|0.00|>": + i += 1 + if i == maxi: + raise RuntimeError("Could not find timestamp_begin token") + self.tokenizer.timestamp_begin = i + + self.tokenizer.all_language_tokens = self.tokenizer.convert_tokens_to_ids([ + t for t in self.tokenizer.additional_special_tokens if len(t) in [6,7] + ]) + # Update old Whisper generation config (ex: error: "The generation config is outdated and is thus not compatible with the `task` argument to `generate` [...] update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224") + if not hasattr(self.generation_config, "lang_to_id"): + self.generation_config.lang_to_id = dict( + (self.tokenizer.decode(itoken), itoken) + for itoken in self.tokenizer.all_language_tokens + ) + if not hasattr(self.generation_config, "task_to_id"): + self.generation_config.task_to_id = dict( + (task, self.tokenizer.encode("<|" + task + "|>", add_special_tokens=False)[0]) + for task in ["transcribe", "translate"]) + self.tokenizer.to_language_token = lambda language: self.generation_config.lang_to_id["<|" + norm_language(language) + "|>"] + self.tokenizer.to_task_token = lambda task: self.generation_config.task_to_id[task] + + self.tokenizer.to_timestamp_token = lambda t: self.tokenizer.encode(f"<|{t:0.2f}|>", add_special_tokens=False)[0] + self.tokenizer.decode_with_timestamps = lambda tokens: self.tokenizer.decode(tokens, decode_with_timestamps=True) + + self.generation_config.no_timestamps_token_id = self.tokenizer.no_timestamps + self.model.generation_config = self.generation_config + + # Access to layers (renamed attributes) + self.decoder = self.model.get_decoder() + self.decoder.ln = self.decoder.layer_norm + self.decoder.token_embedding = self.decoder.embed_tokens + self.decoder.blocks = self.decoder.layers + for block in self.decoder.blocks: + block.cross_attn = block.encoder_attn + + # From the config + if hasattr(generation_config, "is_multilingual"): + self.is_multilingual = generation_config.is_multilingual + else: + self.is_multilingual = generation_config.is_multilingual = (self.tokenizer.sot != 50257) + + # Alignment heads + if hasattr(generation_config, "alignment_heads"): + a = generation_config.alignment_heads + self.alignment_heads = torch.sparse_coo_tensor(np.array(a).transpose(), [True]*len(a)).coalesce().to(self.device) + + def named_parameters(self): + return self.model.named_parameters() + + def transcribe(self, audio, use_token_timestamps=False, **kwargs): + + # Decoding options + # TODO: double check that this setup is correct + generation_config = self.generation_config + generation_config.num_beams = kwargs.get("beam_size", None) or 1 + temperature = kwargs.get("temperature", 0.0) + if isinstance(temperature, (list, tuple)): + # Not supported with transformers + temperature = min(temperature) + if temperature != 0.0: + generation_config.do_sample = True + generation_config.temperature = temperature + generation_config.top_k = kwargs.get("best_of", None) + + initial_prompt = kwargs.get("initial_prompt") + prompt_ids = self.processor.get_prompt_ids(initial_prompt) if (initial_prompt and initial_prompt.strip()) else None + + generate_kwargs = dict( + return_dict_in_generate = True, + return_segments = True, + return_timestamps = True, + return_token_timestamps = use_token_timestamps, + max_length = self.dims.n_text_ctx, + is_multilingual = self.is_multilingual, + prompt_ids = prompt_ids, + generation_config = generation_config, + ) + if self.is_multilingual: + generate_kwargs["language"] = generate_kwargs.get("language") + generate_kwargs["task"] = generate_kwargs.get("task", "transcribe") + + # Extract features + features = self.processor( + audio, + return_tensors="pt", + sampling_rate=16_000, + truncation=False, + ).input_features.to(self.device) + + # Transcribe + output = self.model.generate( + features.to(self.precision), + **generate_kwargs + ) + + # Because the output format is different when there is only one segment (e.g. audio duration < 30 seconds)... (WTF) + if "segments" not in output: + tokens = output.sequences[0] + new_output = { + "segments": [[{ + "tokens": tokens[1:], + "start": torch.tensor(0.0), + "result": { + "sequences": output.sequences[0], + "past_key_values": output.past_key_values, + } + }]] + } + if use_token_timestamps: + new_output["segments"][0][0]["result"]["token_timestamps"] = output.token_timestamps[0] + output = new_output + + # Language detection + first_segment_tokens = output["segments"][0][0]["tokens"].tolist() + if self.tokenizer.sot in first_segment_tokens: + i_sot = first_segment_tokens.index(self.tokenizer.sot) + else: + i_sot = -1 + if self.is_multilingual: + language = self.tokenizer.decode([first_segment_tokens[i_sot+1]], decode_with_timestamps=True) + assert len(language) in [6,7], f"Unexpected language detected: '{language}' ({first_segment_tokens[i_sot+1]}) in '{self.tokenizer.decode(first_segment_tokens, decode_with_timestamps=True)}'" + language = language[2:-2] + else: + language = "en" + + if use_token_timestamps: + remove_punctuation_from_words = kwargs.get("remove_punctuation_from_words", False) + use_space = should_use_space(language) + + full_text = "" + segments = [] + for id, (segment_dict, segment) in enumerate(self._iter_segments(output, prompt_ids)): + + segment_dict = segment_dict | { + "temperature": temperature, + # "avg_logprob": -0.6982866287231445, + # "compression_ratio": 0.5294117647058824, + # "no_speech_prob": 0.019023602828383446 + } + + # Accumulate + if use_token_timestamps: + tokens = segment_dict["tokens_no_timestamp"] + offset = segment_dict["offset"] + all_tokens = segment["result"]["sequences"].tolist() + token_timestamps = segment["result"]["token_timestamps"] + assert len(all_tokens) == len(token_timestamps) + n_tokens = len(tokens) + for i in range(0, len(all_tokens) + 1 - n_tokens): + if all_tokens[i:i+n_tokens] == tokens: + token_timestamps = token_timestamps[i:i+n_tokens+1] + break + assert len(tokens)+1 == len(token_timestamps) + split_tokens = split_tokens_on_spaces if use_space else split_tokens_on_unicode + words, word_tokens, word_tokens_indices = split_tokens(tokens, self.tokenizer, remove_punctuation_from_words=remove_punctuation_from_words) + words_dicts = [] + i_end = 0 + for w, toks in zip(words, word_tokens_indices): + i_start = i_end + i_end = i_start + len(toks) + words_dicts.append({ + "text": w, + "start": offset + token_timestamps[i_start].item(), + "end": offset + token_timestamps[i_end].item(), + # "probability": 0.199 + }) + segment_dict["words"] = words_dicts + + segment_dict.pop("tokens_no_timestamp") + segment_dict.pop("offset") + segments.append(segment_dict) + full_text += segment_dict["text"] + + output_dict = { + "text": full_text, + "segments": segments, + } + if not kwargs.get("language"): + output_dict["language"] = language + + return output_dict + + def _iter_segments(self, output, prompt_ids): + + id = -1 + for sub_segments in output["segments"]: + for segment in sub_segments: + id += 1 + chunk_start = round(max(0, segment["start"].item()), 2) + tokens = segment["tokens"] + if id == 0 and prompt_ids is not None: + tokens = tokens[len(prompt_ids):] + time_tokens = [(i, t.item()) for i, t in enumerate(tokens) if t >= self.tokenizer.timestamp_begin] + i = 0 + while i < len(time_tokens): + i_start, token_start = time_tokens[i] + relative_start = round((token_start - self.tokenizer.timestamp_begin) * AUDIO_TIME_PER_TOKEN, 2) + assert relative_start >= 0 + if i == 0: + offset = chunk_start - relative_start + assert offset >= 0, f"Got negative offset ({offset}) with {chunk_start=} and {relative_start=}" + has_end = i + 1 < len(time_tokens) + if has_end: + i_end, token_end = time_tokens[i+1] + # Ends on either consecutive timestamps, or the next timestamp followed by <|endoftext|> + while i + 2 < len(time_tokens): + if time_tokens[i+2][0] == i_end + 1: break + if i_end + 1 >= len(tokens) or tokens[i_end+1] in [self.tokenizer.eot]: break + logger.warning(f"Unexpected prediction without 2 consecutive timestamps") + i += 1 + i_end, token_end = time_tokens[i+1] + relative_end = round((token_end - self.tokenizer.timestamp_begin) * AUDIO_TIME_PER_TOKEN, 2) + else: + i_end = len(tokens) - 1 + if tokens[i_end] == self.tokenizer.eot: i_end -= 1 + relative_end = SEGMENT_DURATION + start = offset + relative_start + duration = relative_end - relative_start + assert duration >= 0, f"Got negative duration ({duration}) with {relative_end=} and {relative_start=}" + tokens_with_timestamps = tokens[i_start:i_end+1] # include timestamps + text = self.tokenizer.decode(tokens_with_timestamps, skip_special_tokens=True) + tokens_with_timestamps = tokens_with_timestamps.tolist() + tokens_no_timestamp = tokens_with_timestamps[1:-1] if has_end else tokens_with_timestamps[1:] + i += 2 + if not len(tokens_no_timestamp): continue + yield ( + { + "id": id, + "seek": round(offset * SAMPLE_RATE / HOP_LENGTH), + "start": start, + "end": start + duration, + "text": text, + "tokens": tokens_with_timestamps, + "tokens_no_timestamp": tokens_no_timestamp, + "offset": offset, + }, + segment, + ) + + + def __call__(self, mfcc, tokens): + output = self.model(mfcc.to(self.precision), decoder_input_ids=tokens, output_attentions=True) + return output.logits + + +def is_transformer_model(model): + return isinstance(model, TransformerWhisperAsOpenAIWhisper) + + +# Credit: https://github.com/openai/whisper/discussions/830 +def hf_to_whisper_states(text): + # From Speechbrain + if text == "_mel_filters": + return None + + # From PEFT + if "default" in text: + # print(f"WARNING: Ignoring {text}") + return None + if text.startswith("base_model.model."): + text = text[len("base_model.model."):] + + text = re.sub('.layers.', '.blocks.', text) + text = re.sub('.self_attn.', '.attn.', text) + text = re.sub('.q_proj.', '.query.', text) + text = re.sub('.k_proj.', '.key.', text) + text = re.sub('.v_proj.', '.value.', text) + text = re.sub('.out_proj.', '.out.', text) + text = re.sub('.fc1.', '.mlp.0.', text) + text = re.sub('.fc2.', '.mlp.2.', text) + text = re.sub('.fc3.', '.mlp.3.', text) + text = re.sub('.fc3.', '.mlp.3.', text) + text = re.sub('.encoder_attn.', '.cross_attn.', text) + text = re.sub('.cross_attn.ln.', '.cross_attn_ln.', text) + text = re.sub('.embed_positions.weight', '.positional_embedding', text) + text = re.sub('.embed_tokens.', '.token_embedding.', text) + text = re.sub('model.', '', text) + text = re.sub('attn.layer_norm.', 'attn_ln.', text) + text = re.sub('.final_layer_norm.', '.mlp_ln.', text) + text = re.sub('encoder.layer_norm.', 'encoder.ln_post.', text) + text = re.sub('decoder.layer_norm.', 'decoder.ln.', text) + return text + +def states_to_dim(state_dict): + n_audio_state = len(state_dict['encoder.ln_post.bias']) + n_text_state = len(state_dict["decoder.ln.bias"]) + return { + "n_mels": state_dict["encoder.conv1.weight"].shape[1], # 80 + "n_vocab": state_dict["decoder.token_embedding.weight"].shape[0], # 51864 / 51865 + "n_audio_ctx": state_dict["encoder.positional_embedding"].shape[0], # 1500 + "n_audio_state": n_audio_state, # 384 / 512 / 768 / 1024 / 1280 + "n_audio_head": n_audio_state // 64, # 6 / 8 / 12 / 16 / 20 + "n_audio_layer": len(set([".".join(k.split(".")[:3]) for k in state_dict.keys() if "encoder.blocks." in k])), # 4 / 6 / 12 / 24 / 32 + "n_text_ctx": state_dict["decoder.positional_embedding"].shape[0], # 448 + "n_text_state": n_text_state, # 384 / 512 / 768 / 1024 / 1280 + "n_text_head": n_text_state // 64, # 6 / 8 / 12 / 16 / 20 + "n_text_layer": len(set([".".join(k.split(".")[:3]) for k in state_dict.keys() if "decoder.blocks." in k])), # 4 / 6 / 12 / 24 / 32 + } + +class TextDecoderUntied(whisper.model.TextDecoder): + """ + Same as TextDecoder but with untied weights + """ + def __init__(self, *args, **kwargs): + import torch + super().__init__(*args, **kwargs) + + n_vocab, n_state = self.token_embedding.weight.shape + + self.proj_out = torch.nn.Linear(n_state, n_vocab, bias=False) + + def forward(self, x, xa, kv_cache = None): + offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0 + x = self.token_embedding(x) + self.positional_embedding[offset : offset + x.shape[-1]] + x = x.to(xa.dtype) + + for block in self.blocks: + x = block(x, xa, mask=self.mask, kv_cache=kv_cache) + + x = self.ln(x) + + # logits = self.proj_out(x).float() + # logits = (x @ torch.transpose(self.proj_out.weight.to(x.dtype), 0, 1)).float() + logits = self.proj_out.to(x.dtype)(x).float() + + return logits + +class WhisperUntied(whisper.model.Whisper): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.decoder = TextDecoderUntied( + self.dims.n_vocab, + self.dims.n_text_ctx, + self.dims.n_text_state, + self.dims.n_text_head, + self.dims.n_text_layer, + ) + +def cli(): + + import os + import sys + import argparse + import json + + from whisper.utils import str2bool, optional_float, optional_int + + try: + # Old whisper version # Before https://github.com/openai/whisper/commit/da600abd2b296a5450770b872c3765d0a5a5c769 + from whisper.utils import write_txt, write_srt, write_vtt + write_tsv = lambda transcript, file: write_csv(transcript, file, sep="\t", header=True, text_first=False, format_timestamps=lambda x: round(1000 * x)) + + except ImportError: + # New whisper version + from whisper.utils import get_writer + + def do_write(transcript, file, output_format): + writer = get_writer(output_format, os.path.curdir) + try: + return writer.write_result({"segments": list(transcript)}, file, { + "highlight_words": False, + "max_line_width": None, + "max_line_count": None, + }) + except TypeError: + # Version <= 20230314 + return writer.write_result({"segments": transcript}, file) + def get_do_write(output_format): + return lambda transcript, file: do_write(transcript, file, output_format) + + write_txt = get_do_write("txt") + write_srt = get_do_write("srt") + write_vtt = get_do_write("vtt") + write_tsv = get_do_write("tsv") + + parser = argparse.ArgumentParser( + description='Transcribe a single audio with whisper and compute word timestamps', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('-v', '--version', help="show version and exit", action='version', version=f'{__version__}') + parser.add_argument('--versions', help="show versions (of whisper-timestamped and whisper) and exit", action='version', + version=f'{__version__} -- Whisper {whisper.__version__} in {os.path.realpath(os.path.dirname(whisper.__file__))}') + + parser.add_argument('audio', help="audio file(s) to transcribe", nargs='+') + parser.add_argument('--model', help=f"name of the Whisper model to use. Examples: {', '.join(whisper.available_models())}", default="small") + parser.add_argument("--model_dir", default=None, help="the path to save model files; uses ~/.cache/whisper by default", type=str) + parser.add_argument("--device", default=get_default_device(), help="device to use for PyTorch inference") + parser.add_argument("--backend", default=DEFAULT_BACKEND, help="Which backend to use", choices=["openai-whisper", "transformers"], type=str) + parser.add_argument("--output_dir", "-o", default=None, help="directory to save the outputs", type=str) + valid_formats = ["txt", "vtt", "srt", "tsv", "csv", "json"] + def str2output_formats(string): + if string == "all": + return valid_formats + formats = string.split(",") + for format in formats: + if format not in valid_formats: + raise ValueError(f"Expected one of {valid_formats}, got {format}") + return formats + parser.add_argument("--output_format", "-f", default="all", help=f"Format(s) of the output file(s). Possible formats are: {', '.join(valid_formats)}. Several formats can be specified by using commas (ex: \"json,vtt,srt\"). By default (\"all\"), all available formats will be produced", type=str2output_formats) + + parser.add_argument("--task", default="transcribe", help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')", choices=["transcribe", "translate"], type=str) + parser.add_argument('--language', help=f"language spoken in the audio, specify None to perform language detection.", choices=sorted(whisper.tokenizer.LANGUAGES.keys()) + sorted([k.title() for k in whisper.tokenizer.TO_LANGUAGE_CODE.keys()]), default=None) + # f"{', '.join(sorted(k+'('+v+')' for k,v in whisper.tokenizer.LANGUAGES.items()))} + + parser.add_argument('--vad', default=False, help="whether to run Voice Activity Detection (VAD) to remove non-speech segment before applying Whisper model (removes hallucinations). " + "Can be: True, False, auditok, silero (default when vad=True), silero:3.1 (or another version), or a list of timestamps in seconds (e.g. \"[(0.0, 3.50), (32.43, 36.43)]\"). " + "Note: Some additional libraries might be needed (torchaudio and onnxruntime for silero, auditok for auditok)." + ) + parser.add_argument('--detect_disfluencies', default=False, help="whether to try to detect disfluencies, marking them as special words [*]", type=str2bool) + parser.add_argument('--recompute_all_timestamps', default=not TRUST_WHISPER_TIMESTAMP_BY_DEFAULT, help="Do not rely at all on Whisper timestamps (Experimental option: did not bring any improvement, but could be useful in cases where Whipser segment timestamp are wrong by more than 0.5 seconds)", type=str2bool) + parser.add_argument("--punctuations_with_words", default=True, help="whether to include punctuations in the words", type=str2bool) + + parser.add_argument("--temperature", default=0.0, help="temperature to use for sampling", type=float) + parser.add_argument("--best_of", type=optional_int, default=None if USE_EFFICIENT_BY_DEFAULT else 5, help="number of candidates when sampling with non-zero temperature") + parser.add_argument("--beam_size", type=optional_int, default=None if USE_EFFICIENT_BY_DEFAULT else 5, help="number of beams in beam search, only applicable when temperature is zero") + parser.add_argument("--patience", type=float, default=None, help="optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search") + parser.add_argument("--length_penalty", type=float, default=None, help="optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default") + + parser.add_argument("--suppress_tokens", default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations", type=str) + parser.add_argument("--initial_prompt", default=None, help="optional text to provide as a prompt for the first window.", type=str) + parser.add_argument("--condition_on_previous_text", default=True, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop", type=str2bool) + parser.add_argument("--fp16", default=None, help="whether to perform inference in fp16; Automatic by default (True if GPU available, False otherwise)", type=str2bool) + + parser.add_argument("--temperature_increment_on_fallback", default=0.0 if USE_EFFICIENT_BY_DEFAULT else 0.2, help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below", type=optional_float) + parser.add_argument("--compression_ratio_threshold", default=2.4, help="if the gzip compression ratio is higher than this value, treat the decoding as failed", type=optional_float) + parser.add_argument("--logprob_threshold", default=-1.0, help="if the average log probability is lower than this value, treat the decoding as failed", type=optional_float) + parser.add_argument("--no_speech_threshold", default=0.6, help="if the probability of the <|nospeech|> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence", type=optional_float) + parser.add_argument("--threads", default=0, help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS", type=optional_int) + + parser.add_argument("--compute_confidence", default=True, help="whether to compute confidence scores for words", type=str2bool) + parser.add_argument("--verbose", type=str2bool, default=False, help="whether to print out the progress and debug messages of Whisper") + parser.add_argument('--plot', help="plot word alignments (save the figures if an --output_dir is specified, otherwhise just show figures that have to be closed to continue)", default=False, action="store_true") + parser.add_argument('--debug', help="print some debug information about word alignment", default=False, action="store_true") + + class ActionSetAccurate(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + assert nargs is None + super().__init__(option_strings, dest, nargs=0, **kwargs) + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, "best_of", 5) + setattr(namespace, "beam_size", 5) + setattr(namespace, "temperature_increment_on_fallback", 0.2) + parser.add_argument('--accurate', help="Shortcut to use the same default option as in openai-whisper (best_of=5, beam_search=5, temperature_increment_on_fallback=0.2)", action=ActionSetAccurate) + + class ActionSetEfficient(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + assert nargs is None + super().__init__(option_strings, dest, nargs=0, **kwargs) + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, "best_of", None) + setattr(namespace, "beam_size", None) + setattr(namespace, "temperature_increment_on_fallback", None) + parser.add_argument('--efficient', help="Shortcut to disable beam size and options that requires to sample several times, for an efficient decoding", action=ActionSetEfficient) + + parser.add_argument('--naive', help="use naive approach, doing inference twice (once to get the transcription, once to get word timestamps and confidence scores).", default=False, action="store_true") + + args = parser.parse_args().__dict__ + args.pop("accurate") + args.pop("efficient") + + temperature = args.pop("temperature") + temperature_increment_on_fallback = args.pop("temperature_increment_on_fallback") + if temperature_increment_on_fallback: + temperature = tuple(np.arange(temperature, 1.0 + 1e-6, temperature_increment_on_fallback)) + else: + temperature = [temperature] + + threads = args.pop("threads") + if threads: + torch.set_num_threads(threads) + + audio_files = args.pop("audio") + + model = args.pop("model") + device = args.pop("device") + model_dir = args.pop("model_dir") + + if device.lower().startswith("cuda"): + force_cudnn_initialization(device) + + output_format = args.pop("output_format") + backend = args.pop("backend") + + model = load_model(model, device=device, download_root=model_dir, backend=backend) + + plot_word_alignment = args.pop("plot") + + debug = args.pop("debug") + logging.basicConfig() + if debug: + logger.setLevel(logging.DEBUG) + # This supposes to plug a logger with name "WHISPER" into Whisper source code (no harm if it's not set) + logging.getLogger("WHISPER").setLevel(logging.DEBUG) + + output_dir = args.pop("output_dir") + if output_dir and not os.path.isdir(output_dir): + os.makedirs(output_dir) + + args["naive_approach"] = args.pop("naive") + args["remove_punctuation_from_words"] = not args.pop("punctuations_with_words") + args["compute_word_confidence"] = args.pop("compute_confidence") + args["trust_whisper_timestamps"] = not args.pop("recompute_all_timestamps") + + for audio_path in audio_files: + + outname = os.path.join(output_dir, os.path.basename(audio_path)) if output_dir else None + + result = transcribe_timestamped( + model, audio_path, + temperature=temperature, + plot_word_alignment=outname if (outname and plot_word_alignment) else plot_word_alignment, + **args + ) + + if output_dir: + + if "json" in output_format: + # save JSON + with open(outname + ".words.json", "w", encoding="utf-8") as js: + json.dump(result, js, indent=2, ensure_ascii=False) + + # save TXT + if "txt" in output_format: + with open(outname + ".txt", "w", encoding="utf-8") as txt: + write_txt(result["segments"], file=txt) + + # save VTT + if "vtt" in output_format: + with open(outname + ".vtt", "w", encoding="utf-8") as vtt: + write_vtt(remove_keys(result["segments"], "words"), file=vtt) + with open(outname + ".words.vtt", "w", encoding="utf-8") as vtt: + write_vtt(flatten(result["segments"], "words"), file=vtt) + + # save SRT + if "srt" in output_format: + with open(outname + ".srt", "w", encoding="utf-8") as srt: + write_srt(remove_keys(result["segments"], "words"), file=srt) + with open(outname + ".words.srt", "w", encoding="utf-8") as srt: + write_srt(flatten(result["segments"], "words"), file=srt) + + # save CSV + if "csv" in output_format: + with open(outname + ".csv", "w", encoding="utf-8") as csv: + write_csv(result["segments"], file=csv) + with open(outname + ".words.csv", "w", encoding="utf-8") as csv: + write_csv(flatten(result["segments"], "words"), file=csv) + + # save TSV + if "tsv" in output_format: + with open(outname + ".tsv", "w", encoding="utf-8") as csv: + write_tsv(result["segments"], file=csv) + with open(outname + ".words.tsv", "w", encoding="utf-8") as csv: + write_tsv(flatten(result["segments"], "words"), file=csv) + + elif not args["verbose"]: + + json.dump(filtered_keys(result), sys.stdout, indent=2, ensure_ascii=False) + + +def filtered_keys(result, keys = [ + "text", + "segments", "words", + "language", + "start", + "end", + "confidence", + "language_probs", + "speech_activity", +]): + if isinstance(result, dict): + return {k: (filtered_keys(v, keys) if k not in ["language_probs"] else v) for k, v in result.items() if k in keys} + if isinstance(result, list): + return [filtered_keys(v, keys) for v in result] + if isinstance(result, float): + return round(result, 2) + return result + + +if __name__ == "__main__": + cli()