File size: 1,358 Bytes
3edbfec e18fb9d a677076 54f1c88 ab121f4 2ba318d a677076 5b8ef5f a677076 e18fb9d b5e5368 5adbbbe 847a572 ab121f4 847a572 a677076 95bbb32 b5e3b33 ab121f4 f05c13f ab121f4 95bbb32 ab121f4 6d73c34 ab121f4 26ee134 6d73c34 ab121f4 95bbb32 a677076 ab121f4 2ba318d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import numpy as np
from .init import pipe
TASK = "transcribe"
BATCH_SIZE = 8
class A2T:
def __init__(self, mic):
self.mic = mic
def __transcribe(self, inputs, task: str = None):
if inputs is None:
print("Inputs None")
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "english"})
print("transcribed_text : ", transcribed_text)
return transcribed_text["text"]
def __preprocces(self, raw: np.ndarray):
chunk = raw.astype(np.float32) / 32768.0
return chunk
def predict(self):
try:
if self.mic is not None:
chunk = self.mic.get_array_of_samples()
chunk = np.array(chunk, dtype=np.int16)
audio = self.__preprocces(chunk)
print(f"audio : {audio} \n shape : {audio.shape} \n max : {np.max(audio)}")
else:
raise Exception("please provide audio")
if isinstance(audio , np.ndarray):
inputs = {"sampling_rate": 16000, "raw": audio}
return self.__transcribe(inputs=inputs, task=TASK)
else:
raise Exception("Audio is not np array")
except Exception as e:
return f"Oops some kinda error : {e}"
|