socm / models.py
spencer's picture
use streamlit secrets for hf key
d018247
import json
from PIL import Image
import requests
import streamlit as st
from transformers import CLIPProcessor, CLIPModel
from embeddings import logger
HF_TOKEN = st.secrets["hf_api.key"]
#with open("hf_api.key") as f:
# HF_TOKEN = f.read().strip()
class HuggingFaceHosted:
def __init__(self, model_id, api_token, verbose=False):
self.model_id = model_id
self.api_token = api_token
self.verbose = verbose
def query(self, data):
headers = {"Authorization": f"Bearer {self.api_token}"}
API_URL = f"https://api-inference.huggingface.co/models/{self.model_id}"
response = requests.request("POST", API_URL, headers=headers, data=data)
return json.loads(response.content.decode("utf-8"))
def fill_mask(self, text):
data = json.dumps({"inputs": text})
return self.query(data)
def text_generation(self, text, **parameters):
payload = {
"inputs": text,
"parameters": parameters,
}
if self.verbose:
logger.info(payload)
data = json.dumps(payload)
return self.query(data)
def summarization(self, text, do_sample=False):
data = json.dumps({"inputs": text, "parameters": {"do_sample": do_sample}})
return self.query(data)
def question_answering(self, question, context):
data = json.dumps(
{
"inputs": {
"question": question,
"context": context,
}
}
)
return self.query(data)
class CLIP:
def __init__(self, model_id="openai/clip-vit-large-patch14"):
self.model_id = model_id
self.model = CLIPModel.from_pretrained(model_id)
self.processor = CLIPProcessor.from_pretrained(model_id)
def get_image_emb(self, image):
if isinstance(image, str):
image = Image.open(image)
image_inputs = self.processor(images=image, return_tensors="pt", padding=True)
out = self.model.get_image_features(**image_inputs)
return out.detach().numpy()
def get_text_emb(self, text):
text_inputs = self.processor(text=text, return_tensors="pt", padding=True)
out = self.model.get_text_features(**text_inputs)
return out.detach().numpy()
def __repr__(self):
return f"CLIP Local <{self.model_id}>"
class GPTJ(HuggingFaceHosted):
def __init__(
self, model_id="EleutherAI/gpt-j-6B", api_token=HF_TOKEN, verbose=False
):
super().__init__(model_id, api_token, verbose=verbose)
def __call__(self, text, **parameters):
return self.text_generation(text, **parameters)
def __repr__(self):
return f"GPTJ Hosted <{self.model_id}>"
class MaskEncoder(HuggingFaceHosted):
def __init__(self, model_id="roberta-large", api_token=HF_TOKEN, verbose=False):
super().__init__(model_id, api_token, verbose=verbose)
def __call__(self, text):
return self.fill_mask(text)
def __repr__(self):
return f"MaskEncoder Hosted <{self.model_id}>"
class T2T(HuggingFaceHosted):
def __init__(self, model_id="bigscience/T0pp", api_token=HF_TOKEN, verbose=False):
super().__init__(model_id, api_token, verbose=verbose)
def __call__(self, text, **parameters):
return self.text_generation(text, **parameters)
def __repr__(self):
return f"T2T Hosted <{self.model_id}>"