Spaces:

CineAI
/

Chelsea

Sleeping

File size: 1,358 Bytes

3edbfec
 
e18fb9d
a677076
54f1c88
ab121f4
2ba318d
 
a677076
5b8ef5f
a677076
e18fb9d
 
 
 
b5e5368
5adbbbe
847a572
 
 
ab121f4
847a572
a677076
 
 
95bbb32
b5e3b33
ab121f4
f05c13f
ab121f4
95bbb32
ab121f4
6d73c34
 
ab121f4
26ee134
6d73c34
ab121f4
95bbb32
a677076
ab121f4
2ba318d

import numpy as np

from .init import pipe

TASK = "transcribe"
BATCH_SIZE = 8

class A2T:
    def __init__(self, mic):
        self.mic = mic

    def __transcribe(self, inputs, task: str = None):
        if inputs is  None:
            print("Inputs None")

        transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "english"})
        print("transcribed_text : ", transcribed_text)
        return transcribed_text["text"]

    def __preprocces(self, raw: np.ndarray):
        chunk = raw.astype(np.float32) / 32768.0
        return chunk
        
    def predict(self):
        try:
            if self.mic is not None:
                chunk = self.mic.get_array_of_samples()
                chunk = np.array(chunk, dtype=np.int16)
                audio = self.__preprocces(chunk)
                print(f"audio : {audio} \n shape : {audio.shape} \n max : {np.max(audio)}")
            else:
                raise Exception("please provide audio")

            if isinstance(audio , np.ndarray):
                inputs = {"sampling_rate": 16000, "raw": audio}
                return self.__transcribe(inputs=inputs, task=TASK)
            else:
                raise Exception("Audio is not np array")
                
        except Exception as e:
            return f"Oops some kinda error : {e}"