databricks
/

dolly-v2-3b

@@ -22,10 +22,8 @@ INTRO_BLURB = (
 # This is the prompt that is used for generating responses using an already trained model.  It ends with the response
 # key, where the job of the model is to provide the completion that follows it (i.e. the response itself).
 PROMPT_FOR_GENERATION_FORMAT = """{intro}
 {instruction_key}
 {instruction}
 {response_key}
 """.format(
     intro=INTRO_BLURB,
@@ -37,17 +35,13 @@ PROMPT_FOR_GENERATION_FORMAT = """{intro}
 def get_special_token_id(tokenizer: PreTrainedTokenizer, key: str) -> int:
     """Gets the token ID for a given string that has been added to the tokenizer as a special token.
     When training, we configure the tokenizer so that the sequences like "### Instruction:" and "### End" are
     treated specially and converted to a single, new token.  This retrieves the token ID each of these keys map to.
     Args:
         tokenizer (PreTrainedTokenizer): the tokenizer
         key (str): the key to convert to a single token
     Raises:
         RuntimeError: if more than one ID was generated
     Returns:
         int: the token ID for the given key
     """
@@ -62,7 +56,6 @@ class InstructionTextGenerationPipeline(Pipeline):
         self, *args, do_sample: bool = True, max_new_tokens: int = 256, top_p: float = 0.92, top_k: int = 0, **kwargs
     ):
         """Initialize the pipeline
         Args:
             do_sample (bool, optional): Whether or not to use sampling. Defaults to True.
             max_new_tokens (int, optional): Max new tokens after the prompt to generate. Defaults to 128.
@@ -132,7 +125,6 @@ class InstructionTextGenerationPipeline(Pipeline):
         generated_sequence = self.model.generate(
             input_ids=input_ids.to(self.model.device),
             attention_mask=attention_mask.to(self.model.device) if attention_mask is not None else None,
-            pad_token_id=self.tokenizer.pad_token_id,
             **generate_kwargs,
         )
@@ -209,4 +201,4 @@ class InstructionTextGenerationPipeline(Pipeline):
             records.append(rec)
-        return records

 # This is the prompt that is used for generating responses using an already trained model.  It ends with the response
 # key, where the job of the model is to provide the completion that follows it (i.e. the response itself).
 PROMPT_FOR_GENERATION_FORMAT = """{intro}
 {instruction_key}
 {instruction}
 {response_key}
 """.format(
     intro=INTRO_BLURB,
 def get_special_token_id(tokenizer: PreTrainedTokenizer, key: str) -> int:
     """Gets the token ID for a given string that has been added to the tokenizer as a special token.
     When training, we configure the tokenizer so that the sequences like "### Instruction:" and "### End" are
     treated specially and converted to a single, new token.  This retrieves the token ID each of these keys map to.
     Args:
         tokenizer (PreTrainedTokenizer): the tokenizer
         key (str): the key to convert to a single token
     Raises:
         RuntimeError: if more than one ID was generated
     Returns:
         int: the token ID for the given key
     """
         self, *args, do_sample: bool = True, max_new_tokens: int = 256, top_p: float = 0.92, top_k: int = 0, **kwargs
     ):
         """Initialize the pipeline
         Args:
             do_sample (bool, optional): Whether or not to use sampling. Defaults to True.
             max_new_tokens (int, optional): Max new tokens after the prompt to generate. Defaults to 128.
         generated_sequence = self.model.generate(
             input_ids=input_ids.to(self.model.device),
             attention_mask=attention_mask.to(self.model.device) if attention_mask is not None else None,
             **generate_kwargs,
         )
             records.append(rec)
+        return records