CineAI commited on
Commit
b12e8e8
1 Parent(s): 0c43865

Update audio2text/a2t.py

Browse files
Files changed (1) hide show
  1. audio2text/a2t.py +4 -5
audio2text/a2t.py CHANGED
@@ -3,7 +3,7 @@ import numpy as np
3
  from .init import pipe
4
 
5
  TASK = "transcribe"
6
- BATCH_SIZE = 16
7
  LIMIT = 60
8
 
9
  class A2T:
@@ -14,13 +14,13 @@ class A2T:
14
  if inputs is None:
15
  print("Inputs None")
16
 
17
- transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "english"})["text"]
18
  return transcribed_text
19
 
20
  def __preprocces(self, raw: np.ndarray, sampling_rate: int):
21
- chunk = raw.astype(np.float32) / 32768.0
22
 
23
- if sampling_rate > 16000:
24
  chunk = librosa.resample(chunk, orig_sr=sampling_rate, target_sr=16000)
25
 
26
  # chunk = chunk[:16000*LIMIT]
@@ -39,7 +39,6 @@ class A2T:
39
  raise Exception("please provide audio")
40
 
41
  if isinstance(audio , np.ndarray):
42
- # inputs = {"sampling_rate": 16000, "raw": audio}
43
  return self.__transcribe(inputs=audio, task=TASK)
44
  else:
45
  raise Exception("Audio is not np array")
 
3
  from .init import pipe
4
 
5
  TASK = "transcribe"
6
+ BATCH_SIZE = 8
7
  LIMIT = 60
8
 
9
  class A2T:
 
14
  if inputs is None:
15
  print("Inputs None")
16
 
17
+ transcribed_text = pipe(inputs, batch_size=BATCH_SIZE,)["text"]
18
  return transcribed_text
19
 
20
  def __preprocces(self, raw: np.ndarray, sampling_rate: int):
21
+ chunk = raw.astype(np.float32) / 32678.0
22
 
23
+ if sampling_rate != 16000:
24
  chunk = librosa.resample(chunk, orig_sr=sampling_rate, target_sr=16000)
25
 
26
  # chunk = chunk[:16000*LIMIT]
 
39
  raise Exception("please provide audio")
40
 
41
  if isinstance(audio , np.ndarray):
 
42
  return self.__transcribe(inputs=audio, task=TASK)
43
  else:
44
  raise Exception("Audio is not np array")