Update audio2text/a2t.py
Browse files- audio2text/a2t.py +5 -5
audio2text/a2t.py
CHANGED
@@ -9,14 +9,14 @@ class A2T:
|
|
9 |
def __init__(self, mic):
|
10 |
self.mic = mic
|
11 |
|
12 |
-
def
|
13 |
if inputs is None:
|
14 |
-
|
15 |
|
16 |
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
17 |
return transcribed_text
|
18 |
|
19 |
-
def __preprocces(self, raw):
|
20 |
print(f"Raw type : {type(raw)}")
|
21 |
chunk = io.BytesIO(raw)
|
22 |
audio, sample_rate = librosa.load(chunk, sr=16000)
|
@@ -28,12 +28,12 @@ class A2T:
|
|
28 |
if self.mic is not None:
|
29 |
raw = self.mic
|
30 |
audio = self.__preprocces(raw=raw)
|
31 |
-
print(f"audio type : {type(audio)} \n shape : {audio.shape}")
|
32 |
else:
|
33 |
raise Exception("please provide audio")
|
34 |
|
35 |
if isinstance(audio , np.ndarray):
|
36 |
-
return self.
|
37 |
else:
|
38 |
raise Exception("Audio is not np array")
|
39 |
|
|
|
9 |
def __init__(self, mic):
|
10 |
self.mic = mic
|
11 |
|
12 |
+
def __generate_text(self, inputs, task: str = None):
|
13 |
if inputs is None:
|
14 |
+
raise Exception("Inputs is None")
|
15 |
|
16 |
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
17 |
return transcribed_text
|
18 |
|
19 |
+
def __preprocces(self, raw: bytes):
|
20 |
print(f"Raw type : {type(raw)}")
|
21 |
chunk = io.BytesIO(raw)
|
22 |
audio, sample_rate = librosa.load(chunk, sr=16000)
|
|
|
28 |
if self.mic is not None:
|
29 |
raw = self.mic
|
30 |
audio = self.__preprocces(raw=raw)
|
31 |
+
print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
|
32 |
else:
|
33 |
raise Exception("please provide audio")
|
34 |
|
35 |
if isinstance(audio , np.ndarray):
|
36 |
+
return self.__generate_text(inputs=audio, task=TASK)
|
37 |
else:
|
38 |
raise Exception("Audio is not np array")
|
39 |
|