File size: 2,037 Bytes
4ee5676 3edbfec e18fb9d a677076 54f1c88 b12e8e8 4ee5676 13a15d8 2ba318d a677076 5b8ef5f a677076 615c558 e18fb9d 615c558 4ee5676 847a572 4ee5676 13a15d8 4ee5676 cbd5d1a 4ee5676 13a15d8 cbd5d1a 13a15d8 cbd5d1a 4ee5676 847a572 a677076 95bbb32 13a15d8 4ee5676 cbd5d1a 95bbb32 ab121f4 6d73c34 f42d0da 6d73c34 ab121f4 95bbb32 a677076 ab121f4 2ba318d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import librosa
import numpy as np
from .init import pipe
TASK = "transcribe"
BATCH_SIZE = 8
LIMIT = 60
SAMPLING_RATE = 16000
class A2T:
def __init__(self, mic):
self.mic = mic
def __transcribe(self, inputs, task: str = None, lang: str = "english"):
if inputs is None:
print("Inputs None")
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": lang}, return_timestamps=True)["text"]
return transcribed_text
def __preprocces(self, raw: np.ndarray, sampling_rate: int):
chunk = raw.astype(np.float32, order='C') / 32768.0
print(f"Chunk : {chunk} max chunk : {np.max(chunk)}")
if len(chunk.shape) > 1:
chunk = librosa.to_mono(chunk.T)
if sampling_rate != SAMPLING_RATE:
chunk = librosa.resample(chunk, orig_sr=sampling_rate, target_sr=SAMPLING_RATE)
print(f"Sampling rate : {chunk} max chunk : {np.max(chunk)}")
chunk = chunk[:SAMPLING_RATE*LIMIT]
print(f"Chunk cut : {chunk} max chunk : {np.max(chunk)}")
return chunk
def predict(self):
try:
if self.mic is not None:
raw = self.mic.get_array_of_samples()
chunk = np.array(raw, dtype=np.int16)
sampling_rate = self.mic.frame_rate
audio = self.__preprocces(raw=chunk, sampling_rate=sampling_rate)
print(f"audio : {audio} \n shape : {audio.shape} \n max : {np.max(audio)} \n shape of chunk : {chunk.shape} \n sampling rate : {sampling_rate} \n max chunk : {np.max(chunk)} \n chunk : {chunk}")
else:
raise Exception("please provide audio")
if isinstance(audio , np.ndarray):
return self.__transcribe(inputs=audio, task=TASK)
else:
raise Exception("Audio is not np array")
except Exception as e:
return f"Oops some kinda error : {e}"
|