Update audio2text/a2t.py
Browse files- audio2text/a2t.py +10 -2
audio2text/a2t.py
CHANGED
@@ -14,16 +14,25 @@ class A2T:
|
|
14 |
def __preprocces(self, audio, frame_rate):
|
15 |
try:
|
16 |
audio = audio / 32678.0
|
|
|
17 |
|
18 |
if len(audio.shape) > 1:
|
19 |
audio = librosa.to_mono(audio.T)
|
|
|
|
|
20 |
|
21 |
if frame_rate != 16_000:
|
22 |
audio = librosa.resample(audio, orig_sr=frame_rate, target_sr=16000)
|
|
|
|
|
23 |
|
24 |
audio = audio[:16_000*LIMIT]
|
|
|
|
|
25 |
|
26 |
audio = torch.tensor(audio)
|
|
|
|
|
27 |
return audio
|
28 |
except Exception as e:
|
29 |
print("Preprocces error", e)
|
@@ -31,8 +40,7 @@ class A2T:
|
|
31 |
|
32 |
def predict(self):
|
33 |
if self.mic is not None:
|
34 |
-
audio = self.mic
|
35 |
-
audio = np.array(audio)
|
36 |
frame_rate = self.mic.frame_rate
|
37 |
else:
|
38 |
return "please provide audio"
|
|
|
14 |
def __preprocces(self, audio, frame_rate):
|
15 |
try:
|
16 |
audio = audio / 32678.0
|
17 |
+
print("Audio div : ", audio)
|
18 |
|
19 |
if len(audio.shape) > 1:
|
20 |
audio = librosa.to_mono(audio.T)
|
21 |
+
|
22 |
+
print("Audio mono : ", audio)
|
23 |
|
24 |
if frame_rate != 16_000:
|
25 |
audio = librosa.resample(audio, orig_sr=frame_rate, target_sr=16000)
|
26 |
+
|
27 |
+
print("Audio resample : ", audio)
|
28 |
|
29 |
audio = audio[:16_000*LIMIT]
|
30 |
+
|
31 |
+
print("Audio cut : ", audio)
|
32 |
|
33 |
audio = torch.tensor(audio)
|
34 |
+
|
35 |
+
print("Audio torch : ", audio)
|
36 |
return audio
|
37 |
except Exception as e:
|
38 |
print("Preprocces error", e)
|
|
|
40 |
|
41 |
def predict(self):
|
42 |
if self.mic is not None:
|
43 |
+
audio = self.mic
|
|
|
44 |
frame_rate = self.mic.frame_rate
|
45 |
else:
|
46 |
return "please provide audio"
|