wisper-base-jwt / handler.py
michaellee8's picture
fix: b64 decode
0a25040
from typing import Dict
from transformers import pipeline
import requests
import jwt
import base64
SAMPLE_RATE = 16000
PUBLIC_KEY = b"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAu1SU1LfVLPHCozMxH2Mo\n4lgOEePzNm0tRgeLezV6ffAt0gunVTLw7onLRnrq0/IzW7yWR7QkrmBL7jTKEn5u\n+qKhbwKfBstIs+bMY2Zkp18gnTxKLxoS2tFczGkPLPgizskuemMghRniWaoLcyeh\nkd3qqGElvW/VDL5AaWTg0nLVkjRo9z+40RQzuVaE8AkAFmxZzow3x+VJYKdjykkJ\n0iT9wCS0DRTXu269V264Vf/3jvredZiKRkgwlL9xNAwxXFg0x/XFw005UWVRIkdg\ncKWTjpBP2dPwVZ4WWC+9aGVd+Gyn1o0CLelf4rEjGoXbAAEgAqeGUxrcIlbjXfbc\nmwIDAQAB\n-----END PUBLIC KEY-----"
class EndpointHandler:
def __init__(self, path=""):
self.pipeline = pipeline(
"automatic-speech-recognition", model="openai/whisper-base"
)
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
"""
Args:
data (:obj:):
includes the deserialized audio file as bytes
Return:
A :obj:`dict`:. base64 encoded image
"""
# process input
token = data.pop("token", None)
if token is None:
raise RuntimeError("missing token")
decoded = jwt.decode(token, PUBLIC_KEY, algorithms=["RS512"])
print("received input from jti=", decoded["jti"])
inputs = data.pop("inputs", None)
if isinstance(inputs, str):
inputs = base64.b64decode(inputs)
parameters = data.pop("parameters", {})
return self.pipeline(inputs, **parameters)