piercemaloney
/

llemma_7b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Pierce Maloney commited on Apr 10

Commit

833b301

•

1 Parent(s): 88fdb99

adding new bad word

Files changed (2) hide show

handler.py +9 -15
test_tokenizer +0 -0

handler.py CHANGED Viewed

@@ -7,9 +7,9 @@ class EndpointHandler():
     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
         tokenizer = AutoTokenizer.from_pretrained(path)
-        self.tokenizer = tokenizer
-        self.model = AutoModelForCausalLM.from_pretrained(path)
-        self.tokenizer.pad_token = tokenizer.eos_token
         self.stopping_criteria = StoppingCriteriaList([StopAtPeriodCriteria(tokenizer)])
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -21,24 +21,18 @@ class EndpointHandler():
             A :obj:`list` | `dict`: will be serialized and returned
         """
         inputs = data.pop("inputs", data)
-        input_ids = self.tokenizer.encode(inputs, return_tensors="pt")
         # Bad word: id 3070 corresponds to "(*", and we do not want to output a comment
-        prediction_ids = self.model.generate(
-            input_ids,
-            max_length=input_ids.shape[1] + 50,
             stopping_criteria=self.stopping_criteria,
-            bad_words_ids=[[3070], [313, 334]],
             temperature=1,
             top_k=40,
-            # pad_token_id=self.tokenizer.eos_token_id,
-            # return_dict_in_generate=True,  # To get more detailed output (optional)
         )
-        # Decode the generated ids to text
-        # Exclude the input_ids length to get only the new tokens
-        prediction_text = self.tokenizer.decode(prediction_ids[0, input_ids.shape[1]:], skip_special_tokens=True)
-        return [{"generated_text": prediction_text, "ids": prediction_ids[0, input_ids.shape[1]:].tolist()}]
 class StopAtPeriodCriteria(StoppingCriteria):

     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
         tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(path)
+        tokenizer.pad_token = tokenizer.eos_token
+        self.pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer)
         self.stopping_criteria = StoppingCriteriaList([StopAtPeriodCriteria(tokenizer)])
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
             A :obj:`list` | `dict`: will be serialized and returned
         """
         inputs = data.pop("inputs", data)
         # Bad word: id 3070 corresponds to "(*", and we do not want to output a comment
+        prediction = self.pipeline(
+            inputs,
             stopping_criteria=self.stopping_criteria,
+            max_new_tokens=50,
+            return_full_text=False,
+            bad_words_ids=[[3070], [313, 334], [10456]],
             temperature=1,
             top_k=40,
         )
+        return prediction
 class StopAtPeriodCriteria(StoppingCriteria):

test_tokenizer ADDED Viewed

File without changes