piercemaloney
/

llemma_7b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Pierce Maloney commited on Apr 14

Commit

8cda5e7

•

1 Parent(s): b179918

taking out truncation of input_ids

Files changed (1) hide show

handler.py +4 -4

handler.py CHANGED Viewed

@@ -31,11 +31,11 @@ class EndpointHandler():
         input_ids = self.tokenizer.encode(inputs, return_tensors="pt").to('cuda')
         max_generation_length = 75  # Desired number of tokens to generate
-        max_input_length = 4092 - max_generation_length  # Maximum input length to allow space for generation
-        # Truncate input_ids to the most recent tokens that fit within the max_input_length
-        if input_ids.shape[1] > max_input_length:
-            input_ids = input_ids[:, -max_input_length:]
         max_length = input_ids.shape[1] + max_generation_length

         input_ids = self.tokenizer.encode(inputs, return_tensors="pt").to('cuda')
         max_generation_length = 75  # Desired number of tokens to generate
+        # max_input_length = 4092 - max_generation_length  # Maximum input length to allow space for generation
+        # # Truncate input_ids to the most recent tokens that fit within the max_input_length
+        # if input_ids.shape[1] > max_input_length:
+        #     input_ids = input_ids[:, -max_input_length:]
         max_length = input_ids.shape[1] + max_generation_length