MikeTangoEcho commited on
Commit
79c8857
1 Parent(s): 11efa99

feat: update app

Browse files
Files changed (1) hide show
  1. app.py +47 -9
app.py CHANGED
@@ -47,41 +47,56 @@ tc = pipeline(
47
  # - or a bytes object (recommended for streaming),
48
  # - or a tuple of (sample rate in Hz, audio data as numpy array)
49
  def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
 
 
50
  if audio is None:
51
  return "..."
52
  # TODO Manage str/Path
53
 
54
- logger.debug("====> Transcribe")
55
-
56
  text = ""
 
57
  # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
58
  # Whisper input format for tuple differ from output provided by gradio audio component
59
- if asr_model.startswith("openai/whisper"):
60
  sampling_rate, raw = audio
61
 
62
  # Convert to mono if stereo
63
  if raw.ndim > 1:
64
  raw = raw.mean(axis=1)
65
 
66
- raw = raw.astype(np.float32) # Convert to asr_torch_dtype
 
67
  raw /= np.max(np.abs(raw))
68
 
69
- inputs = {"sampling_rate": sampling_rate, "raw": raw} # if type(audio) is tuple else audio
70
 
71
  logger.debug(inputs)
72
 
73
  transcript = asr(inputs)
74
  text = transcript['text']
75
 
76
- logger.debug("====> Tokenize:[" + text + "]")
 
 
 
 
 
77
 
78
  entities = tc(text)
79
 
80
- #logger.debug("Classify:[" + entities + "]")
81
 
82
  # TODO Add Text Classification for sentiment analysis
83
  return {"text": text, "entities": entities}
84
 
 
 
 
 
 
 
 
 
85
  # ---
86
 
87
  # Gradio
@@ -96,8 +111,8 @@ input_audio = gr.Audio(
96
 
97
  ## App
98
 
99
- gradio_app = gr.Interface(
100
- transcribe,
101
  inputs=[
102
  input_audio
103
  ],
@@ -111,5 +126,28 @@ gradio_app = gr.Interface(
111
  flagging_mode="never"
112
  )
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  ## Start!
115
  gradio_app.launch()
 
47
  # - or a bytes object (recommended for streaming),
48
  # - or a tuple of (sample rate in Hz, audio data as numpy array)
49
  def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
50
+ logger.debug(">Transcribe")
51
+
52
  if audio is None:
53
  return "..."
54
  # TODO Manage str/Path
55
 
 
 
56
  text = ""
57
+
58
  # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
59
  # Whisper input format for tuple differ from output provided by gradio audio component
60
+ if asr_model.startswith("openai/whisper") and type(audio) is tuple:
61
  sampling_rate, raw = audio
62
 
63
  # Convert to mono if stereo
64
  if raw.ndim > 1:
65
  raw = raw.mean(axis=1)
66
 
67
+ # Convert according to asr_torch_dtype
68
+ raw = raw.astype(np.float16 if type(asr_torch_dtype) is torch.float16 else np.float32)
69
  raw /= np.max(np.abs(raw))
70
 
71
+ inputs = {"sampling_rate": sampling_rate, "raw": raw}
72
 
73
  logger.debug(inputs)
74
 
75
  transcript = asr(inputs)
76
  text = transcript['text']
77
 
78
+ logger.debug(text)
79
+
80
+ return text
81
+
82
+ def tokenize(text: str):
83
+ logger.debug(">Tokenize")
84
 
85
  entities = tc(text)
86
 
87
+ logger.debug(entities)
88
 
89
  # TODO Add Text Classification for sentiment analysis
90
  return {"text": text, "entities": entities}
91
 
92
+ def classify(text: str):
93
+ logger.debug(">Classify")
94
+
95
+ return None
96
+
97
+ def transcribe_tokenize(*arg):
98
+ return tokenize(transcribe(arg))
99
+
100
  # ---
101
 
102
  # Gradio
 
111
 
112
  ## App
113
 
114
+ asrner_app = gr.Interface(
115
+ transcribe_tokenize,
116
  inputs=[
117
  input_audio
118
  ],
 
126
  flagging_mode="never"
127
  )
128
 
129
+ ner_app = gr.Interface(
130
+ tokenize,
131
+ inputs=[
132
+ gr.Textbox()
133
+ ],
134
+ outputs=[
135
+ gr.HighlightedText()
136
+ ],
137
+ title="NERSBX",
138
+ description=(
139
+ "Tokenize, Classify"
140
+ ),
141
+ flagging_mode="never"
142
+ )
143
+
144
+ gradio_app = gr.TabbedInterface(
145
+ interface_list=[
146
+ asrner_app,
147
+ ner_app
148
+ ],
149
+ title="ASRNERSBX"
150
+ )
151
+
152
  ## Start!
153
  gradio_app.launch()