import time from transformers import AutoModelForCausalLM, AutoTokenizer class HFModel: def __init__(self, model_name): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained(model_name) def __call__(self, request): prompt = request.get("prompt") input_ids = self.tokenizer.encode(prompt, return_tensors='pt') choices = self.model.generate(input_ids, max_length=50, do_sample=True) choices = self.tokenizer.decode(choices, skip_special_tokens=True) completion = { 'id': None, # fill in 'model': 'codegen', 'object': 'text_completion', 'created': int(time.time()), 'choices': None, # fill in 'usage': { 'completion_tokens': int(sum([len(c.split()) for c in choices])), 'prompt_tokens': int(len(prompt.split())), 'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())), } } return completion, choices