Spaces:
Paused
Paused
File size: 3,607 Bytes
48b9b5d 3ccf873 c7a4f81 3ccf873 48b9b5d 75b7975 48b9b5d 75b7975 48b9b5d 75b7975 48b9b5d 8b4aa8a 75b7975 48b9b5d 79c8857 48b9b5d 79c8857 48b9b5d 79c8857 c092255 79c8857 c092255 79c8857 11efa99 48b9b5d 79c8857 3ccf873 48b9b5d 3ccf873 79c8857 3ccf873 48b9b5d 79c8857 48b9b5d 79c8857 48b9b5d f09d2ab 48b9b5d 7dc8950 f5ddb49 48b9b5d 79c8857 f09d2ab 79c8857 f09d2ab 79c8857 48b9b5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import numpy as np
import torch
import transformers
from pathlib import Path
from transformers import pipeline
from transformers.utils import logging
# Log
#logging.set_verbosity_debug()
logger = logging.get_logger("transformers")
# Pipelines
## Automatic Speech Recognition
## https://huggingface.co/docs/transformers/task_summary#automatic-speech-recognition
## Require ffmpeg to be installed
asr_device = "cuda:0" if torch.cuda.is_available() else "cpu"
asr_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
asr_model = "openai/whisper-tiny"
asr = pipeline(
"automatic-speech-recognition",
model=asr_model,
torch_dtype=asr_torch_dtype,
device=asr_device
)
## Token Classification / Name Entity Recognition
## https://huggingface.co/docs/transformers/task_summary#token-classification
tc_device = 0 if torch.cuda.is_available() else "cpu"
tc_model = "dslim/distilbert-NER"
tc = pipeline(
"token-classification", # ner
model=tc_model,
device=tc_device
)
# ---
# Transformers
# https://www.gradio.app/main/docs/gradio/audio#behavior
# As output component: expects audio data in any of these formats:
# - a str or pathlib.Path filepath
# - or URL to an audio file,
# - or a bytes object (recommended for streaming),
# - or a tuple of (sample rate in Hz, audio data as numpy array)
def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
logger.debug(">Transcribe")
if audio is None:
return "..."
# TODO Manage str/Path
text = ""
# https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
# Whisper input format for tuple differ from output provided by gradio audio component
if asr_model.startswith("openai/whisper") and type(audio) is tuple:
sampling_rate, raw = audio
# Convert to mono if stereo
if raw.ndim > 1:
raw = raw.mean(axis=1)
# Convert according to asr_torch_dtype
raw = raw.astype(np.float16 if type(asr_torch_dtype) is torch.float16 else np.float32)
raw /= np.max(np.abs(raw))
inputs = {"sampling_rate": sampling_rate, "raw": raw}
logger.debug(inputs)
transcript = asr(inputs)
text = transcript['text']
logger.debug(text)
return text
def tokenize(text: str):
logger.debug(">Tokenize")
entities = tc(text)
logger.debug(entities)
# TODO Add Text Classification for sentiment analysis
return {"text": text, "entities": entities}
def classify(text: str):
logger.debug(">Classify")
return None
def transcribe_tokenize(*arg):
return tokenize(transcribe(arg))
# ---
# Gradio
## Interfaces
# https://www.gradio.app/main/docs/gradio/audio
input_audio = gr.Audio(
sources=["upload", "microphone"],
show_share_button=False
)
## App
asrner_app = gr.Interface(
transcribe_tokenize,
inputs=[
input_audio
],
outputs=[
gr.HighlightedText()
],
title="ASR>NER",
description=(
"Transcribe, Tokenize, Classify"
),
flagging_mode="never"
)
ner_app = gr.Interface(
tokenize,
inputs=[
gr.Textbox()
],
outputs=[
gr.HighlightedText()
],
title="NER",
description=(
"Tokenize, Classify"
),
flagging_mode="never"
)
gradio_app = gr.TabbedInterface(
interface_list=[
asrner_app,
ner_app
],
tab_names=[
asrner_app.title,
ner_app.title
],
title="ASRNERSBX"
)
## Start!
gradio_app.launch() |