CineAI commited on
Commit
95bbb32
·
verified ·
1 Parent(s): 3c14b77

Update audio2text/a2t.py

Browse files
Files changed (1) hide show
  1. audio2text/a2t.py +9 -43
audio2text/a2t.py CHANGED
@@ -4,64 +4,30 @@ import librosa
4
  import torch
5
  from .init import pipe
6
 
7
- LIMIT = 90 # limit 90 seconds
8
  TASK = "transcribe"
9
 
10
  class A2T:
11
  def __init__(self, mic):
12
  self.mic = mic
13
 
14
- def __preprocces(self, audio, frame_rate):
15
- try:
16
- print("Audio before : ", audio)
17
- audio = audio / 32678.0
18
- print("Audio div : ", audio)
19
-
20
- if len(audio.shape) > 1:
21
- audio = librosa.to_mono(audio.T)
22
-
23
- print("Audio mono : ", audio)
24
-
25
- if frame_rate != 16_000:
26
- audio = librosa.resample(audio, orig_sr=frame_rate, target_sr=16000)
27
-
28
- print("Audio resample : ", audio)
29
-
30
- audio = audio[:16_000*LIMIT]
31
-
32
- print("Audio cut : ", audio)
33
-
34
- audio = torch.tensor(audio)
35
-
36
- print("Audio torch : ", audio)
37
- return audio
38
- except Exception as e:
39
- print("Preprocces error", e)
40
- return None
41
-
42
  def __transcribe(self, inputs, task: str = None):
43
  if inputs is None:
44
  print("Inputs None")
45
 
46
  transcribed_text = pipe(inputs, generate_kwargs={"task": task}, return_timestamps=True)["text"]
 
47
  return transcribed_text
48
-
49
 
50
  def predict(self):
51
- if self.mic is not None:
52
- chunk = self.mic.get_array_of_samples()
53
- audio = np.array(chunk)
54
- # frame_rate = self.mic.frame_rate
55
- else:
56
- return "please provide audio"
57
-
58
  try:
59
- # forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
60
- # # audio = self.__preprocces(audio=audio, frame_rate=frame_rate)
61
- # inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
62
- # predicted_ids = model.generate(**inputs, max_length=400, forced_decoder_ids=forced_decoder_ids)
63
- # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
64
- return self.__transcribe(inputs=audio, task=TASK)
 
 
65
  except Exception as e:
66
  print("Predict error", e)
67
  return "Oops some kinda error"
 
4
  import torch
5
  from .init import pipe
6
 
 
7
  TASK = "transcribe"
8
 
9
  class A2T:
10
  def __init__(self, mic):
11
  self.mic = mic
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def __transcribe(self, inputs, task: str = None):
14
  if inputs is None:
15
  print("Inputs None")
16
 
17
  transcribed_text = pipe(inputs, generate_kwargs={"task": task}, return_timestamps=True)["text"]
18
+ print(transcribed_text)
19
  return transcribed_text
 
20
 
21
  def predict(self):
 
 
 
 
 
 
 
22
  try:
23
+ if self.mic is not None:
24
+ chunk = self.mic.get_array_of_samples()
25
+ audio = np.array(chunk)
26
+ print(audio)
27
+ return self.__transcribe(inputs=audio, task=TASK)
28
+ else:
29
+ return "please provide audio"
30
+
31
  except Exception as e:
32
  print("Predict error", e)
33
  return "Oops some kinda error"