Spaces:

h2oai
/

h2o_wave_whisper

Running

App Files Files Community

MartinT commited on Jan 16, 2023

Commit

0c7f41b

1 Parent(s): 97bd8a9

feat: Init.

Browse files

Files changed (7) hide show

Dockerfile +18 -0
README.md +74 -6
app.py +207 -0
cards.py +145 -0
record.js +130 -0
requirements.txt +2 -0
utils.py +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN apt update && apt install ffmpeg
+COPY . .
+ENV H2O_WAVE_LISTEN=":7860"
+ENV H2O_WAVE_ADDRESS='http://127.0.0.1:7860'
+CMD ["wave", "run", "app", "--no-reload"]

README.md CHANGED Viewed

@@ -1,11 +1,79 @@
 ---
-title: H2o Wave Whisper
-emoji: 🐢
 colorFrom: yellow
-colorTo: blue
 sdk: docker
-pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: H2O Wave NER Annotation
+emoji: 📝
 colorFrom: yellow
+colorTo: gray
 sdk: docker
+app_port: 7860
 ---
+<div align='center'>
+<h1>WaveTon</h1>
+💯 Wave applications
+<br>
+<br>
+[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg?logo=apache)](https://github.com/vopani/waveton/blob/master/LICENSE)
+[![GitHub](https://img.shields.io/github/stars/vopani/waveton?color=yellowgreen&logo=github)](https://img.shields.io/github/stars/vopani/waveton?color=yellowgreen&logo=github)
+[![Twitter](https://img.shields.io/twitter/follow/vopani)](https://twitter.com/vopani)
+</div>
+## Whisper 🖥️
+Speech to text using OpenAI's Whisper model.
+![](demo.gif)
+## Setup ⚙️
+1. Check the version of Python, must be Python 3.9+ but recommended to use Python 3.10+ for best experience
+```commandline
+python3 --version
+```
+2. Clone the repository
+```commandline
+git clone https://github.com/vopani/waveton.git
+```
+3. Create a virtual environment
+```commandline
+cd waveton/apps/deeplearning_apps/whisper
+python3 -m venv venv
+source venv/bin/activate
+```
+4. Install ffmpeg
+On Linux:
+```commandline
+sudo apt update && sudo apt install ffmpeg
+```
+On Mac:
+```commandline
+brew install ffmpeg
+```
+5. Install the packages
+```commandline
+python3 -m pip install -U pip
+python3 -m pip install -r requirements.txt
+```
+6. Run the application
+```commandline
+wave run app
+```
+7. View the application on your local browser: [http://localhost:10101](http://localhost:10101)

app.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import logging
+from h2o_wave import Q, main, app, copy_expando, handle_on, on
+import whisper
+import cards
+from utils import get_inline_script
+# Set up logging
+logging.basicConfig(format='%(levelname)s:\t[%(asctime)s]\t%(message)s', level=logging.INFO)
+@app('/')
+async def serve(q: Q):
+    """
+    Main entry point. All queries pass through this function.
+    """
+    try:
+        # Initialize the app if not already
+        if not q.app.initialized:
+            await initialize_app(q)
+        # Initialize the client if not already
+        if not q.client.initialized:
+            await initialize_client(q)
+        # Update theme if toggled
+        elif q.args.theme_dark is not None and q.args.theme_dark != q.client.theme_dark:
+            await update_theme(q)
+        # Run inference if audio is recorded
+        elif q.events.audio:
+            await audio_inference(q)
+        # Delegate query to query handlers
+        elif await handle_on(q):
+            pass
+        # Adding this condition to help in identifying bugs
+        else:
+            await handle_fallback(q)
+    except Exception as error:
+        await show_error(q, error=str(error))
+async def initialize_app(q: Q):
+    """
+    Initialize the app.
+    """
+    logging.info('Initializing app')
+    # Set initial argument values
+    q.app.cards = ['main', 'error']
+    q.app.model = whisper.load_model('base')
+    q.app.initialized = True
+async def initialize_client(q: Q):
+    """
+    Initialize the client (browser tab).
+    """
+    logging.info('Initializing client')
+    # Set initial argument values
+    q.client.theme_dark = True
+    # Add layouts, scripts, header and footer
+    q.page['meta'] = cards.meta
+    q.page['header'] = cards.header
+    q.page['footer'] = cards.footer
+    # Add cards for the main page
+    q.page['asr'] = cards.asr()
+    q.client.initialized = True
+    await q.page.save()
+async def update_theme(q: Q):
+    """
+    Update theme of app.
+    """
+    # Copying argument values to client
+    copy_expando(q.args, q.client)
+    if q.client.theme_dark:
+        logging.info('Updating theme to dark mode')
+        # Update theme from light to dark mode
+        q.page['meta'].theme = 'h2o-dark'
+        q.page['header'].icon_color = 'black'
+    else:
+        logging.info('Updating theme to light mode')
+        # Update theme from dark to light mode
+        q.page['meta'].theme = 'light'
+        q.page['header'].icon_color = '#FEC924'
+    await q.page.save()
+@on('start')
+async def start_recording(q: Q):
+    """
+    Start recording audio.
+    """
+    logging.info('Starting recording')
+    q.page['meta'].script = get_inline_script('startRecording()')
+    q.page['asr'] = cards.asr(recording=True)
+    await q.page.save()
+@on('stop')
+async def stop_recording(q: Q):
+    """
+    Stop recording audio.
+    """
+    logging.info('Stopping recording')
+    q.page['meta'].script = get_inline_script('stopRecording()')
+    q.page['asr'] = cards.asr()
+    await q.page.save()
+@on('audio')
+async def audio_inference(q: Q):
+    """
+    Running ASR inference on audio.
+    """
+    logging.info('Inferencing recorded audio')
+    audio_path = await q.site.download(q.events.audio.captured, '.')
+    q.client.transcription = q.app.model.transcribe(audio_path)['text']
+    q.page['asr'] = cards.asr(audio_path=q.events.audio.captured, transcription=q.client.transcription)
+    await q.page.save()
+def clear_cards(q: Q, card_names: list):
+    """
+    Clear cards from the page.
+    """
+    logging.info('Clearing cards')
+    # Delete cards from the page
+    for card_name in card_names:
+        del q.page[card_name]
+async def show_error(q: Q, error: str):
+    """
+    Displays errors.
+    """
+    logging.error(error)
+    # Clear all cards
+    clear_cards(q, q.app.cards)
+    # Format and display the error
+    q.page['error'] = cards.crash_report(q)
+    await q.page.save()
+@on('reload')
+async def reload_client(q: Q):
+    """
+    Reset the client.
+    """
+    logging.info('Reloading client')
+    # Clear all cards
+    clear_cards(q, q.app.cards)
+    # Reload the client
+    await initialize_client(q)
+async def handle_fallback(q: Q):
+    """
+    Handle fallback cases.
+    """
+    logging.info('Adding fallback page')
+    q.page['fallback'] = cards.fallback
+    await q.page.save()

cards.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import sys
+import traceback
+from h2o_wave import Q, expando_to_dict, ui
+# App name
+app_name = 'Whisper'
+# Link to repo. Report bugs/features here :)
+repo_url = 'https://github.com/vopani/waveton'
+issue_url = f'{repo_url}/issues/new?assignees=vopani&labels=bug&template=error-report.md&title=%5BERROR%5D'
+# JS scripts
+encoder_url = 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/encoderWorker.umd.js'
+recorder_url = 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/OpusMediaRecorder.umd.js'
+with open('record.js', encoding='utf-8') as f:
+    recorder_script = ui.inline_script(f.read())
+# A meta card to hold the app's title, layouts, dialogs, theme and other meta information
+meta = ui.meta_card(
+    box='',
+    title='WaveTon',
+    layouts=[
+        ui.layout(
+            breakpoint='xs',
+            zones=[
+                ui.zone(name='header'),
+                ui.zone(name='main'),
+                ui.zone(name='footer')
+            ]
+        )
+    ],
+    theme='h2o-dark',
+    scripts=[
+        ui.script(encoder_url, asynchronous=False),
+        ui.script(recorder_url, asynchronous=False)
+    ],
+    script=recorder_script
+)
+# The header shown on all the app's pages
+header = ui.header_card(
+    box='header',
+    title='Whisper',
+    subtitle="Speech to text using OpenAI's Whisper model",
+    icon='Microphone',
+    icon_color='black',
+    items=[ui.toggle(name='theme_dark', label='Dark Mode', value=True, trigger=True)]
+)
+# The footer shown on all the app's pages
+footer = ui.footer_card(
+    box='footer',
+    caption=f'Learn more about <a href="{repo_url}" target="_blank"> WaveTon: 💯 Wave Applications</a>'
+)
+# A fallback card for handling bugs
+fallback = ui.form_card(
+    box='fallback',
+    items=[ui.text('Uh-oh, something went wrong!')]
+)
+def asr(recording: bool = False, audio_path: str = None, transcription: str = '') -> ui.FormCard:
+    """
+    Card for Automatic Speech Recognition.
+    """
+    button_name = 'stop' if recording else 'start'
+    button_label = '⏹️ Stop Recording' if recording else '🎙️ Start Recording'
+    visible = False if audio_path is None else True
+    card = ui.form_card(
+        box='main',
+        items=[
+            ui.separator(label='Microphone'),
+            ui.buttons(items=[ui.button(name=button_name, label=button_label, primary=True)], justify='center'),
+            ui.progress(label='Recording...', caption='', visible=recording),
+            ui.separator(label='Audio', visible=visible),
+            ui.text(
+                content=f'''<center>
+                    <audio controls><source src="{audio_path}" type="audio/wav"></source></audio>
+                    <center>''',
+                visible=visible
+            ),
+            ui.separator(label='Transcription', visible=visible),
+            ui.textbox(name='transcription', value=transcription, multiline=True, visible=visible)
+        ]
+    )
+    return card
+def crash_report(q: Q) -> ui.FormCard:
+    """
+    Card for capturing the stack trace and current application state, for error reporting.
+    This function is called by the main serve() loop on uncaught exceptions.
+    """
+    def code_block(content): return '\n'.join(['```', *content, '```'])
+    type_, value_, traceback_ = sys.exc_info()
+    stack_trace = traceback.format_exception(type_, value_, traceback_)
+    dump = [
+        '### Stack Trace',
+        code_block(stack_trace),
+    ]
+    states = [
+        ('q.app', q.app),
+        ('q.user', q.user),
+        ('q.client', q.client),
+        ('q.events', q.events),
+        ('q.args', q.args)
+    ]
+    for name, source in states:
+        dump.append(f'### {name}')
+        dump.append(code_block([f'{k}: {v}' for k, v in expando_to_dict(source).items()]))
+    return ui.form_card(
+        box='main',
+        items=[
+            ui.stats(
+                items=[
+                    ui.stat(
+                        label='',
+                        value='Oops!',
+                        caption='Something went wrong',
+                        icon='Error'
+                    )
+                ],
+            ),
+            ui.separator(),
+            ui.text_l(content='Apologies for the inconvenience!'),
+            ui.buttons(items=[ui.button(name='reload', label='Reload', primary=True)]),
+            ui.expander(name='report', label='Error Details', items=[
+                ui.text(
+                    f'To report this issue, <a href="{issue_url}" target="_blank">please open an issue</a> with the details below:'),
+                ui.text_l(content=f'Report Issue in App: **{app_name}**'),
+                ui.text(content='\n'.join(dump)),
+            ])
+        ]
+    )

record.js ADDED Viewed

	@@ -0,0 +1,130 @@

+// Shim for Safari.
+window.AudioContext = window.AudioContext || window.webkitAudioContext
+function audioBufferToWav(buffer, opt) {
+  opt = opt || {}
+  var numChannels = buffer.numberOfChannels
+  var sampleRate = buffer.sampleRate
+  var format = opt.float32 ? 3 : 1
+  var bitDepth = format === 3 ? 32 : 16
+  var result
+  if (numChannels === 2) {
+    result = interleave(buffer.getChannelData(0), buffer.getChannelData(1))
+  } else {
+    result = buffer.getChannelData(0)
+  }
+  return encodeWAV(result, format, sampleRate, numChannels, bitDepth)
+}
+function encodeWAV(samples, format, sampleRate, numChannels, bitDepth) {
+  var bytesPerSample = bitDepth / 8
+  var blockAlign = numChannels * bytesPerSample
+  var buffer = new ArrayBuffer(44 + samples.length * bytesPerSample)
+  var view = new DataView(buffer)
+  /* RIFF identifier */
+  writeString(view, 0, 'RIFF')
+  /* RIFF chunk length */
+  view.setUint32(4, 36 + samples.length * bytesPerSample, true)
+  /* RIFF type */
+  writeString(view, 8, 'WAVE')
+  /* format chunk identifier */
+  writeString(view, 12, 'fmt ')
+  /* format chunk length */
+  view.setUint32(16, 16, true)
+  /* sample format (raw) */
+  view.setUint16(20, format, true)
+  /* channel count */
+  view.setUint16(22, numChannels, true)
+  /* sample rate */
+  view.setUint32(24, sampleRate, true)
+  /* byte rate (sample rate * block align) */
+  view.setUint32(28, sampleRate * blockAlign, true)
+  /* block align (channel count * bytes per sample) */
+  view.setUint16(32, blockAlign, true)
+  /* bits per sample */
+  view.setUint16(34, bitDepth, true)
+  /* data chunk identifier */
+  writeString(view, 36, 'data')
+  /* data chunk length */
+  view.setUint32(40, samples.length * bytesPerSample, true)
+  if (format === 1) { // Raw PCM
+    floatTo16BitPCM(view, 44, samples)
+  } else {
+    writeFloat32(view, 44, samples)
+  }
+  return buffer
+}
+function interleave(inputL, inputR) {
+  var length = inputL.length + inputR.length
+  var result = new Float32Array(length)
+  var index = 0
+  var inputIndex = 0
+  while (index < length) {
+    result[index++] = inputL[inputIndex]
+    result[index++] = inputR[inputIndex]
+    inputIndex++
+  }
+  return result
+}
+function writeFloat32(output, offset, input) {
+ for (var i = 0; i < input.length; i++, offset += 4) {
+  output.setFloat32(offset, input[i], true)
+ }
+}
+function floatTo16BitPCM(output, offset, input) {
+ for (var i = 0; i < input.length; i++, offset += 2) {
+  var s = Math.max(-1, Math.min(1, input[i]))
+  output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true)
+ }
+}
+function writeString(view, offset, string) {
+ for (var i = 0; i < string.length; i++) {
+  view.setUint8(offset + i, string.charCodeAt(i))
+ }
+}
+// Safari does not support promise-based decodeAudioData, need to use callback instead.
+const decodeAudioData = buffer => new Promise((res, rej) => {
+ new AudioContext().decodeAudioData(buffer, res, rej)
+})
+const startRecording = async () => {
+ const data = []
+ // Ask for mic permissions.
+ const stream = await navigator.mediaDevices.getUserMedia({ video: false, audio: true })
+ window.stream = stream
+ // Use polyfill for older browsers.
+ if (!window.MediaRecorder) {
+   window.MediaRecorder = OpusMediaRecorder
+   window.recorder = new MediaRecorder(stream, {}, {
+     OggOpusEncoderWasmPath: 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/OggOpusEncoder.wasm',
+     WebMOpusEncoderWasmPath: 'https://cdn.jsdelivr.net/npm/opus-media-recorder@latest/WebMOpusEncoder.wasm'
+   })
+ }
+ else window.recorder = new MediaRecorder(stream)
+ // Handle incoming data.
+ window.recorder.ondataavailable = e => data.push(e.data)
+ window.recorder.start()
+ window.recorder.onerror = e => { throw e.error || new Error(e.name) }
+ window.recorder.onstop = async (e) => {
+   const blob = new Blob(data)
+   const fetchedBlob = await fetch(URL.createObjectURL(blob))
+   const arrayBuffer = await fetchedBlob.arrayBuffer()
+   // Convert to wav format.
+   const wav = audioBufferToWav(await decodeAudioData(arrayBuffer))
+   const formData = new FormData()
+   formData.append('files', new Blob([wav], { type: 'audio/wave' }), 'sound.wav')
+   // Send the audio file to Wave server.
+   const res = await fetch(wave.uploadURL, { method: 'POST', body: formData })
+   const { files } = await res.json()
+   // Emit event (q.events.audio.captured) with a URL of the audio file at Wave server.
+   window.wave.emit('audio', 'captured', files[0])
+ }
+}
+const stopRecording = () => {
+ window.recorder.stop()
+ window.stream.getTracks().forEach(track => track.stop())
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ h2o_wave==0.23.1
2	+ git+https://github.com/openai/whisper.git

utils.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from h2o_wave import ui
+def get_inline_script(text: str) -> ui.InlineScript:
+    """
+    Get Wave's Inline Script.
+    """
+    return ui.inline_script(text)