Update audio2text/a2t.py
Browse files- audio2text/a2t.py +9 -43
audio2text/a2t.py
CHANGED
@@ -4,64 +4,30 @@ import librosa
|
|
4 |
import torch
|
5 |
from .init import pipe
|
6 |
|
7 |
-
LIMIT = 90 # limit 90 seconds
|
8 |
TASK = "transcribe"
|
9 |
|
10 |
class A2T:
|
11 |
def __init__(self, mic):
|
12 |
self.mic = mic
|
13 |
|
14 |
-
def __preprocces(self, audio, frame_rate):
|
15 |
-
try:
|
16 |
-
print("Audio before : ", audio)
|
17 |
-
audio = audio / 32678.0
|
18 |
-
print("Audio div : ", audio)
|
19 |
-
|
20 |
-
if len(audio.shape) > 1:
|
21 |
-
audio = librosa.to_mono(audio.T)
|
22 |
-
|
23 |
-
print("Audio mono : ", audio)
|
24 |
-
|
25 |
-
if frame_rate != 16_000:
|
26 |
-
audio = librosa.resample(audio, orig_sr=frame_rate, target_sr=16000)
|
27 |
-
|
28 |
-
print("Audio resample : ", audio)
|
29 |
-
|
30 |
-
audio = audio[:16_000*LIMIT]
|
31 |
-
|
32 |
-
print("Audio cut : ", audio)
|
33 |
-
|
34 |
-
audio = torch.tensor(audio)
|
35 |
-
|
36 |
-
print("Audio torch : ", audio)
|
37 |
-
return audio
|
38 |
-
except Exception as e:
|
39 |
-
print("Preprocces error", e)
|
40 |
-
return None
|
41 |
-
|
42 |
def __transcribe(self, inputs, task: str = None):
|
43 |
if inputs is None:
|
44 |
print("Inputs None")
|
45 |
|
46 |
transcribed_text = pipe(inputs, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
|
|
47 |
return transcribed_text
|
48 |
-
|
49 |
|
50 |
def predict(self):
|
51 |
-
if self.mic is not None:
|
52 |
-
chunk = self.mic.get_array_of_samples()
|
53 |
-
audio = np.array(chunk)
|
54 |
-
# frame_rate = self.mic.frame_rate
|
55 |
-
else:
|
56 |
-
return "please provide audio"
|
57 |
-
|
58 |
try:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
except Exception as e:
|
66 |
print("Predict error", e)
|
67 |
return "Oops some kinda error"
|
|
|
4 |
import torch
|
5 |
from .init import pipe
|
6 |
|
|
|
7 |
TASK = "transcribe"
|
8 |
|
9 |
class A2T:
|
10 |
def __init__(self, mic):
|
11 |
self.mic = mic
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def __transcribe(self, inputs, task: str = None):
|
14 |
if inputs is None:
|
15 |
print("Inputs None")
|
16 |
|
17 |
transcribed_text = pipe(inputs, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
18 |
+
print(transcribed_text)
|
19 |
return transcribed_text
|
|
|
20 |
|
21 |
def predict(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
try:
|
23 |
+
if self.mic is not None:
|
24 |
+
chunk = self.mic.get_array_of_samples()
|
25 |
+
audio = np.array(chunk)
|
26 |
+
print(audio)
|
27 |
+
return self.__transcribe(inputs=audio, task=TASK)
|
28 |
+
else:
|
29 |
+
return "please provide audio"
|
30 |
+
|
31 |
except Exception as e:
|
32 |
print("Predict error", e)
|
33 |
return "Oops some kinda error"
|