Spaces:
Running
Running
feat: Init.
Browse files- Dockerfile +18 -0
- README.md +74 -6
- app.py +207 -0
- cards.py +145 -0
- record.js +130 -0
- requirements.txt +2 -0
- utils.py +9 -0
Dockerfile
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
RUN apt update && apt install ffmpeg
|
12 |
+
|
13 |
+
COPY . .
|
14 |
+
|
15 |
+
ENV H2O_WAVE_LISTEN=":7860"
|
16 |
+
ENV H2O_WAVE_ADDRESS='http://127.0.0.1:7860'
|
17 |
+
|
18 |
+
CMD ["wave", "run", "app", "--no-reload"]
|
README.md
CHANGED
@@ -1,11 +1,79 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
-
|
8 |
-
license: apache-2.0
|
9 |
---
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: H2O Wave NER Annotation
|
3 |
+
emoji: 📝
|
4 |
colorFrom: yellow
|
5 |
+
colorTo: gray
|
6 |
sdk: docker
|
7 |
+
app_port: 7860
|
|
|
8 |
---
|
9 |
|
10 |
+
<div align='center'>
|
11 |
+
|
12 |
+
<h1>WaveTon</h1>
|
13 |
+
💯 Wave applications
|
14 |
+
|
15 |
+
<br>
|
16 |
+
<br>
|
17 |
+
|
18 |
+
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg?logo=apache)](https://github.com/vopani/waveton/blob/master/LICENSE)
|
19 |
+
[![GitHub](https://img.shields.io/github/stars/vopani/waveton?color=yellowgreen&logo=github)](https://img.shields.io/github/stars/vopani/waveton?color=yellowgreen&logo=github)
|
20 |
+
[![Twitter](https://img.shields.io/twitter/follow/vopani)](https://twitter.com/vopani)
|
21 |
+
|
22 |
+
</div>
|
23 |
+
|
24 |
+
## Whisper 🖥️
|
25 |
+
|
26 |
+
Speech to text using OpenAI's Whisper model.
|
27 |
+
|
28 |
+
![](demo.gif)
|
29 |
+
|
30 |
+
## Setup ⚙️
|
31 |
+
|
32 |
+
1. Check the version of Python, must be Python 3.9+ but recommended to use Python 3.10+ for best experience
|
33 |
+
|
34 |
+
```commandline
|
35 |
+
python3 --version
|
36 |
+
```
|
37 |
+
|
38 |
+
2. Clone the repository
|
39 |
+
|
40 |
+
```commandline
|
41 |
+
git clone https://github.com/vopani/waveton.git
|
42 |
+
```
|
43 |
+
|
44 |
+
3. Create a virtual environment
|
45 |
+
|
46 |
+
```commandline
|
47 |
+
cd waveton/apps/deeplearning_apps/whisper
|
48 |
+
python3 -m venv venv
|
49 |
+
source venv/bin/activate
|
50 |
+
```
|
51 |
+
|
52 |
+
4. Install ffmpeg
|
53 |
+
|
54 |
+
On Linux:
|
55 |
+
|
56 |
+
```commandline
|
57 |
+
sudo apt update && sudo apt install ffmpeg
|
58 |
+
```
|
59 |
+
|
60 |
+
On Mac:
|
61 |
+
|
62 |
+
```commandline
|
63 |
+
brew install ffmpeg
|
64 |
+
```
|
65 |
+
|
66 |
+
5. Install the packages
|
67 |
+
|
68 |
+
```commandline
|
69 |
+
python3 -m pip install -U pip
|
70 |
+
python3 -m pip install -r requirements.txt
|
71 |
+
```
|
72 |
+
|
73 |
+
6. Run the application
|
74 |
+
|
75 |
+
```commandline
|
76 |
+
wave run app
|
77 |
+
```
|
78 |
+
|
79 |
+
7. View the application on your local browser: [http://localhost:10101](http://localhost:10101)
|
app.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from h2o_wave import Q, main, app, copy_expando, handle_on, on
|
4 |
+
import whisper
|
5 |
+
|
6 |
+
import cards
|
7 |
+
from utils import get_inline_script
|
8 |
+
|
9 |
+
# Set up logging
|
10 |
+
logging.basicConfig(format='%(levelname)s:\t[%(asctime)s]\t%(message)s', level=logging.INFO)
|
11 |
+
|
12 |
+
|
13 |
+
@app('/')
|
14 |
+
async def serve(q: Q):
|
15 |
+
"""
|
16 |
+
Main entry point. All queries pass through this function.
|
17 |
+
"""
|
18 |
+
|
19 |
+
try:
|
20 |
+
# Initialize the app if not already
|
21 |
+
if not q.app.initialized:
|
22 |
+
await initialize_app(q)
|
23 |
+
|
24 |
+
# Initialize the client if not already
|
25 |
+
if not q.client.initialized:
|
26 |
+
await initialize_client(q)
|
27 |
+
|
28 |
+
# Update theme if toggled
|
29 |
+
elif q.args.theme_dark is not None and q.args.theme_dark != q.client.theme_dark:
|
30 |
+
await update_theme(q)
|
31 |
+
|
32 |
+
# Run inference if audio is recorded
|
33 |
+
elif q.events.audio:
|
34 |
+
await audio_inference(q)
|
35 |
+
|
36 |
+
# Delegate query to query handlers
|
37 |
+
elif await handle_on(q):
|
38 |
+
pass
|
39 |
+
|
40 |
+
# Adding this condition to help in identifying bugs
|
41 |
+
else:
|
42 |
+
await handle_fallback(q)
|
43 |
+
|
44 |
+
except Exception as error:
|
45 |
+
await show_error(q, error=str(error))
|
46 |
+
|
47 |
+
|
48 |
+
async def initialize_app(q: Q):
|
49 |
+
"""
|
50 |
+
Initialize the app.
|
51 |
+
"""
|
52 |
+
|
53 |
+
logging.info('Initializing app')
|
54 |
+
|
55 |
+
# Set initial argument values
|
56 |
+
q.app.cards = ['main', 'error']
|
57 |
+
|
58 |
+
q.app.model = whisper.load_model('base')
|
59 |
+
|
60 |
+
q.app.initialized = True
|
61 |
+
|
62 |
+
|
63 |
+
async def initialize_client(q: Q):
|
64 |
+
"""
|
65 |
+
Initialize the client (browser tab).
|
66 |
+
"""
|
67 |
+
|
68 |
+
logging.info('Initializing client')
|
69 |
+
|
70 |
+
# Set initial argument values
|
71 |
+
q.client.theme_dark = True
|
72 |
+
|
73 |
+
# Add layouts, scripts, header and footer
|
74 |
+
q.page['meta'] = cards.meta
|
75 |
+
q.page['header'] = cards.header
|
76 |
+
q.page['footer'] = cards.footer
|
77 |
+
|
78 |
+
# Add cards for the main page
|
79 |
+
q.page['asr'] = cards.asr()
|
80 |
+
|
81 |
+
q.client.initialized = True
|
82 |
+
|
83 |
+
await q.page.save()
|
84 |
+
|
85 |
+
|
86 |
+
async def update_theme(q: Q):
|
87 |
+
"""
|
88 |
+
Update theme of app.
|
89 |
+
"""
|
90 |
+
|
91 |
+
# Copying argument values to client
|
92 |
+
copy_expando(q.args, q.client)
|
93 |
+
|
94 |
+
if q.client.theme_dark:
|
95 |
+
logging.info('Updating theme to dark mode')
|
96 |
+
|
97 |
+
# Update theme from light to dark mode
|
98 |
+
q.page['meta'].theme = 'h2o-dark'
|
99 |
+
q.page['header'].icon_color = 'black'
|
100 |
+
else:
|
101 |
+
logging.info('Updating theme to light mode')
|
102 |
+
|
103 |
+
# Update theme from dark to light mode
|
104 |
+
q.page['meta'].theme = 'light'
|
105 |
+
q.page['header'].icon_color = '#FEC924'
|
106 |
+
|
107 |
+
await q.page.save()
|
108 |
+
|
109 |
+
|
110 |
+
@on('start')
|
111 |
+
async def start_recording(q: Q):
|
112 |
+
"""
|
113 |
+
Start recording audio.
|
114 |
+
"""
|
115 |
+
|
116 |
+
logging.info('Starting recording')
|
117 |
+
|
118 |
+
q.page['meta'].script = get_inline_script('startRecording()')
|
119 |
+
q.page['asr'] = cards.asr(recording=True)
|
120 |
+
|
121 |
+
await q.page.save()
|
122 |
+
|
123 |
+
|
124 |
+
@on('stop')
|
125 |
+
async def stop_recording(q: Q):
|
126 |
+
"""
|
127 |
+
Stop recording audio.
|
128 |
+
"""
|
129 |
+
|
130 |
+
logging.info('Stopping recording')
|
131 |
+
|
132 |
+
q.page['meta'].script = get_inline_script('stopRecording()')
|
133 |
+
q.page['asr'] = cards.asr()
|
134 |
+
|
135 |
+
await q.page.save()
|
136 |
+
|
137 |
+
|
138 |
+
@on('audio')
|
139 |
+
async def audio_inference(q: Q):
|
140 |
+
"""
|
141 |
+
Running ASR inference on audio.
|
142 |
+
"""
|
143 |
+
|
144 |
+
logging.info('Inferencing recorded audio')
|
145 |
+
|
146 |
+
audio_path = await q.site.download(q.events.audio.captured, '.')
|
147 |
+
|
148 |
+
q.client.transcription = q.app.model.transcribe(audio_path)['text']
|
149 |
+
|
150 |
+
q.page['asr'] = cards.asr(audio_path=q.events.audio.captured, transcription=q.client.transcription)
|
151 |
+
|
152 |
+
await q.page.save()
|
153 |
+
|
154 |
+
|
155 |
+
def clear_cards(q: Q, card_names: list):
|
156 |
+
"""
|
157 |
+
Clear cards from the page.
|
158 |
+
"""
|
159 |
+
|
160 |
+
logging.info('Clearing cards')
|
161 |
+
|
162 |
+
# Delete cards from the page
|
163 |
+
for card_name in card_names:
|
164 |
+
del q.page[card_name]
|
165 |
+
|
166 |
+
|
167 |
+
async def show_error(q: Q, error: str):
|
168 |
+
"""
|
169 |
+
Displays errors.
|
170 |
+
"""
|
171 |
+
|
172 |
+
logging.error(error)
|
173 |
+
|
174 |
+
# Clear all cards
|
175 |
+
clear_cards(q, q.app.cards)
|
176 |
+
|
177 |
+
# Format and display the error
|
178 |
+
q.page['error'] = cards.crash_report(q)
|
179 |
+
|
180 |
+
await q.page.save()
|
181 |
+
|
182 |
+
|
183 |
+
@on('reload')
|
184 |
+
async def reload_client(q: Q):
|
185 |
+
"""
|
186 |
+
Reset the client.
|
187 |
+
"""
|
188 |
+
|
189 |
+
logging.info('Reloading client')
|
190 |
+
|
191 |
+
# Clear all cards
|
192 |
+
clear_cards(q, q.app.cards)
|
193 |
+
|
194 |
+
# Reload the client
|
195 |
+
await initialize_client(q)
|
196 |
+
|
197 |
+
|
198 |
+
async def handle_fallback(q: Q):
|
199 |
+
"""
|
200 |
+
Handle fallback cases.
|
201 |
+
"""
|
202 |
+
|
203 |
+
logging.info('Adding fallback page')
|
204 |
+
|
205 |
+
q.page['fallback'] = cards.fallback
|
206 |
+
|
207 |
+
await q.page.save()
|
cards.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import traceback
|
3 |
+
|
4 |
+
from h2o_wave import Q, expando_to_dict, ui
|
5 |
+
|
6 |
+
# App name
|
7 |
+
app_name = 'Whisper'
|
8 |
+
|
9 |
+
# Link to repo. Report bugs/features here :)
|
10 |
+
repo_url = 'https://github.com/vopani/waveton'
|
11 |
+
issue_url = f'{repo_url}/issues/new?assignees=vopani&labels=bug&template=error-report.md&title=%5BERROR%5D'
|
12 |
+
|
13 |
+
# JS scripts
|
14 |
+
encoder_url = 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/encoderWorker.umd.js'
|
15 |
+
recorder_url = 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/OpusMediaRecorder.umd.js'
|
16 |
+
|
17 |
+
with open('record.js', encoding='utf-8') as f:
|
18 |
+
recorder_script = ui.inline_script(f.read())
|
19 |
+
|
20 |
+
# A meta card to hold the app's title, layouts, dialogs, theme and other meta information
|
21 |
+
meta = ui.meta_card(
|
22 |
+
box='',
|
23 |
+
title='WaveTon',
|
24 |
+
layouts=[
|
25 |
+
ui.layout(
|
26 |
+
breakpoint='xs',
|
27 |
+
zones=[
|
28 |
+
ui.zone(name='header'),
|
29 |
+
ui.zone(name='main'),
|
30 |
+
ui.zone(name='footer')
|
31 |
+
]
|
32 |
+
)
|
33 |
+
],
|
34 |
+
theme='h2o-dark',
|
35 |
+
scripts=[
|
36 |
+
ui.script(encoder_url, asynchronous=False),
|
37 |
+
ui.script(recorder_url, asynchronous=False)
|
38 |
+
],
|
39 |
+
script=recorder_script
|
40 |
+
)
|
41 |
+
|
42 |
+
# The header shown on all the app's pages
|
43 |
+
header = ui.header_card(
|
44 |
+
box='header',
|
45 |
+
title='Whisper',
|
46 |
+
subtitle="Speech to text using OpenAI's Whisper model",
|
47 |
+
icon='Microphone',
|
48 |
+
icon_color='black',
|
49 |
+
items=[ui.toggle(name='theme_dark', label='Dark Mode', value=True, trigger=True)]
|
50 |
+
)
|
51 |
+
|
52 |
+
# The footer shown on all the app's pages
|
53 |
+
footer = ui.footer_card(
|
54 |
+
box='footer',
|
55 |
+
caption=f'Learn more about <a href="{repo_url}" target="_blank"> WaveTon: 💯 Wave Applications</a>'
|
56 |
+
)
|
57 |
+
|
58 |
+
# A fallback card for handling bugs
|
59 |
+
fallback = ui.form_card(
|
60 |
+
box='fallback',
|
61 |
+
items=[ui.text('Uh-oh, something went wrong!')]
|
62 |
+
)
|
63 |
+
|
64 |
+
|
65 |
+
def asr(recording: bool = False, audio_path: str = None, transcription: str = '') -> ui.FormCard:
|
66 |
+
"""
|
67 |
+
Card for Automatic Speech Recognition.
|
68 |
+
"""
|
69 |
+
|
70 |
+
button_name = 'stop' if recording else 'start'
|
71 |
+
button_label = '⏹️ Stop Recording' if recording else '🎙️ Start Recording'
|
72 |
+
visible = False if audio_path is None else True
|
73 |
+
|
74 |
+
card = ui.form_card(
|
75 |
+
box='main',
|
76 |
+
items=[
|
77 |
+
ui.separator(label='Microphone'),
|
78 |
+
ui.buttons(items=[ui.button(name=button_name, label=button_label, primary=True)], justify='center'),
|
79 |
+
ui.progress(label='Recording...', caption='', visible=recording),
|
80 |
+
ui.separator(label='Audio', visible=visible),
|
81 |
+
ui.text(
|
82 |
+
content=f'''<center>
|
83 |
+
<audio controls><source src="{audio_path}" type="audio/wav"></source></audio>
|
84 |
+
<center>''',
|
85 |
+
visible=visible
|
86 |
+
),
|
87 |
+
ui.separator(label='Transcription', visible=visible),
|
88 |
+
ui.textbox(name='transcription', value=transcription, multiline=True, visible=visible)
|
89 |
+
]
|
90 |
+
)
|
91 |
+
|
92 |
+
return card
|
93 |
+
|
94 |
+
|
95 |
+
def crash_report(q: Q) -> ui.FormCard:
|
96 |
+
"""
|
97 |
+
Card for capturing the stack trace and current application state, for error reporting.
|
98 |
+
This function is called by the main serve() loop on uncaught exceptions.
|
99 |
+
"""
|
100 |
+
|
101 |
+
def code_block(content): return '\n'.join(['```', *content, '```'])
|
102 |
+
|
103 |
+
type_, value_, traceback_ = sys.exc_info()
|
104 |
+
stack_trace = traceback.format_exception(type_, value_, traceback_)
|
105 |
+
|
106 |
+
dump = [
|
107 |
+
'### Stack Trace',
|
108 |
+
code_block(stack_trace),
|
109 |
+
]
|
110 |
+
|
111 |
+
states = [
|
112 |
+
('q.app', q.app),
|
113 |
+
('q.user', q.user),
|
114 |
+
('q.client', q.client),
|
115 |
+
('q.events', q.events),
|
116 |
+
('q.args', q.args)
|
117 |
+
]
|
118 |
+
for name, source in states:
|
119 |
+
dump.append(f'### {name}')
|
120 |
+
dump.append(code_block([f'{k}: {v}' for k, v in expando_to_dict(source).items()]))
|
121 |
+
|
122 |
+
return ui.form_card(
|
123 |
+
box='main',
|
124 |
+
items=[
|
125 |
+
ui.stats(
|
126 |
+
items=[
|
127 |
+
ui.stat(
|
128 |
+
label='',
|
129 |
+
value='Oops!',
|
130 |
+
caption='Something went wrong',
|
131 |
+
icon='Error'
|
132 |
+
)
|
133 |
+
],
|
134 |
+
),
|
135 |
+
ui.separator(),
|
136 |
+
ui.text_l(content='Apologies for the inconvenience!'),
|
137 |
+
ui.buttons(items=[ui.button(name='reload', label='Reload', primary=True)]),
|
138 |
+
ui.expander(name='report', label='Error Details', items=[
|
139 |
+
ui.text(
|
140 |
+
f'To report this issue, <a href="{issue_url}" target="_blank">please open an issue</a> with the details below:'),
|
141 |
+
ui.text_l(content=f'Report Issue in App: **{app_name}**'),
|
142 |
+
ui.text(content='\n'.join(dump)),
|
143 |
+
])
|
144 |
+
]
|
145 |
+
)
|
record.js
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Shim for Safari.
|
2 |
+
window.AudioContext = window.AudioContext || window.webkitAudioContext
|
3 |
+
|
4 |
+
function audioBufferToWav(buffer, opt) {
|
5 |
+
opt = opt || {}
|
6 |
+
var numChannels = buffer.numberOfChannels
|
7 |
+
var sampleRate = buffer.sampleRate
|
8 |
+
var format = opt.float32 ? 3 : 1
|
9 |
+
var bitDepth = format === 3 ? 32 : 16
|
10 |
+
var result
|
11 |
+
if (numChannels === 2) {
|
12 |
+
result = interleave(buffer.getChannelData(0), buffer.getChannelData(1))
|
13 |
+
} else {
|
14 |
+
result = buffer.getChannelData(0)
|
15 |
+
}
|
16 |
+
return encodeWAV(result, format, sampleRate, numChannels, bitDepth)
|
17 |
+
}
|
18 |
+
|
19 |
+
function encodeWAV(samples, format, sampleRate, numChannels, bitDepth) {
|
20 |
+
var bytesPerSample = bitDepth / 8
|
21 |
+
var blockAlign = numChannels * bytesPerSample
|
22 |
+
var buffer = new ArrayBuffer(44 + samples.length * bytesPerSample)
|
23 |
+
var view = new DataView(buffer)
|
24 |
+
/* RIFF identifier */
|
25 |
+
writeString(view, 0, 'RIFF')
|
26 |
+
/* RIFF chunk length */
|
27 |
+
view.setUint32(4, 36 + samples.length * bytesPerSample, true)
|
28 |
+
/* RIFF type */
|
29 |
+
writeString(view, 8, 'WAVE')
|
30 |
+
/* format chunk identifier */
|
31 |
+
writeString(view, 12, 'fmt ')
|
32 |
+
/* format chunk length */
|
33 |
+
view.setUint32(16, 16, true)
|
34 |
+
/* sample format (raw) */
|
35 |
+
view.setUint16(20, format, true)
|
36 |
+
/* channel count */
|
37 |
+
view.setUint16(22, numChannels, true)
|
38 |
+
/* sample rate */
|
39 |
+
view.setUint32(24, sampleRate, true)
|
40 |
+
/* byte rate (sample rate * block align) */
|
41 |
+
view.setUint32(28, sampleRate * blockAlign, true)
|
42 |
+
/* block align (channel count * bytes per sample) */
|
43 |
+
view.setUint16(32, blockAlign, true)
|
44 |
+
/* bits per sample */
|
45 |
+
view.setUint16(34, bitDepth, true)
|
46 |
+
/* data chunk identifier */
|
47 |
+
writeString(view, 36, 'data')
|
48 |
+
/* data chunk length */
|
49 |
+
view.setUint32(40, samples.length * bytesPerSample, true)
|
50 |
+
if (format === 1) { // Raw PCM
|
51 |
+
floatTo16BitPCM(view, 44, samples)
|
52 |
+
} else {
|
53 |
+
writeFloat32(view, 44, samples)
|
54 |
+
}
|
55 |
+
return buffer
|
56 |
+
}
|
57 |
+
|
58 |
+
function interleave(inputL, inputR) {
|
59 |
+
var length = inputL.length + inputR.length
|
60 |
+
var result = new Float32Array(length)
|
61 |
+
var index = 0
|
62 |
+
var inputIndex = 0
|
63 |
+
while (index < length) {
|
64 |
+
result[index++] = inputL[inputIndex]
|
65 |
+
result[index++] = inputR[inputIndex]
|
66 |
+
inputIndex++
|
67 |
+
}
|
68 |
+
return result
|
69 |
+
}
|
70 |
+
|
71 |
+
function writeFloat32(output, offset, input) {
|
72 |
+
for (var i = 0; i < input.length; i++, offset += 4) {
|
73 |
+
output.setFloat32(offset, input[i], true)
|
74 |
+
}
|
75 |
+
}
|
76 |
+
|
77 |
+
function floatTo16BitPCM(output, offset, input) {
|
78 |
+
for (var i = 0; i < input.length; i++, offset += 2) {
|
79 |
+
var s = Math.max(-1, Math.min(1, input[i]))
|
80 |
+
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true)
|
81 |
+
}
|
82 |
+
}
|
83 |
+
|
84 |
+
function writeString(view, offset, string) {
|
85 |
+
for (var i = 0; i < string.length; i++) {
|
86 |
+
view.setUint8(offset + i, string.charCodeAt(i))
|
87 |
+
}
|
88 |
+
}
|
89 |
+
|
90 |
+
// Safari does not support promise-based decodeAudioData, need to use callback instead.
|
91 |
+
const decodeAudioData = buffer => new Promise((res, rej) => {
|
92 |
+
new AudioContext().decodeAudioData(buffer, res, rej)
|
93 |
+
})
|
94 |
+
const startRecording = async () => {
|
95 |
+
const data = []
|
96 |
+
// Ask for mic permissions.
|
97 |
+
const stream = await navigator.mediaDevices.getUserMedia({ video: false, audio: true })
|
98 |
+
window.stream = stream
|
99 |
+
// Use polyfill for older browsers.
|
100 |
+
if (!window.MediaRecorder) {
|
101 |
+
window.MediaRecorder = OpusMediaRecorder
|
102 |
+
window.recorder = new MediaRecorder(stream, {}, {
|
103 |
+
OggOpusEncoderWasmPath: 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/OggOpusEncoder.wasm',
|
104 |
+
WebMOpusEncoderWasmPath: 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/WebMOpusEncoder.wasm'
|
105 |
+
})
|
106 |
+
}
|
107 |
+
else window.recorder = new MediaRecorder(stream)
|
108 |
+
// Handle incoming data.
|
109 |
+
window.recorder.ondataavailable = e => data.push(e.data)
|
110 |
+
window.recorder.start()
|
111 |
+
window.recorder.onerror = e => { throw e.error || new Error(e.name) }
|
112 |
+
window.recorder.onstop = async (e) => {
|
113 |
+
const blob = new Blob(data)
|
114 |
+
const fetchedBlob = await fetch(URL.createObjectURL(blob))
|
115 |
+
const arrayBuffer = await fetchedBlob.arrayBuffer()
|
116 |
+
// Convert to wav format.
|
117 |
+
const wav = audioBufferToWav(await decodeAudioData(arrayBuffer))
|
118 |
+
const formData = new FormData()
|
119 |
+
formData.append('files', new Blob([wav], { type: 'audio/wave' }), 'sound.wav')
|
120 |
+
// Send the audio file to Wave server.
|
121 |
+
const res = await fetch(wave.uploadURL, { method: 'POST', body: formData })
|
122 |
+
const { files } = await res.json()
|
123 |
+
// Emit event (q.events.audio.captured) with a URL of the audio file at Wave server.
|
124 |
+
window.wave.emit('audio', 'captured', files[0])
|
125 |
+
}
|
126 |
+
}
|
127 |
+
const stopRecording = () => {
|
128 |
+
window.recorder.stop()
|
129 |
+
window.stream.getTracks().forEach(track => track.stop())
|
130 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
h2o_wave==0.23.1
|
2 |
+
git+https://github.com/openai/whisper.git
|
utils.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from h2o_wave import ui
|
2 |
+
|
3 |
+
|
4 |
+
def get_inline_script(text: str) -> ui.InlineScript:
|
5 |
+
"""
|
6 |
+
Get Wave's Inline Script.
|
7 |
+
"""
|
8 |
+
|
9 |
+
return ui.inline_script(text)
|