Update modeling_molmo.py
Browse files- modeling_molmo.py +3 -3
modeling_molmo.py
CHANGED
@@ -2509,7 +2509,6 @@ class MOLMoForCausalLM(PreTrainedModel):
|
|
2509 |
outputs: ModelOutput,
|
2510 |
model_kwargs: Dict[str, Any],
|
2511 |
is_encoder_decoder: bool = False,
|
2512 |
-
standardize_cache_format: bool = False,
|
2513 |
num_new_tokens: int = 1,
|
2514 |
) -> Dict[str, Any]:
|
2515 |
if self.config.use_position_ids:
|
@@ -2520,8 +2519,9 @@ class MOLMoForCausalLM(PreTrainedModel):
|
|
2520 |
del model_kwargs["images"]
|
2521 |
del model_kwargs["image_masks"]
|
2522 |
del model_kwargs["image_input_idx"]
|
2523 |
-
|
2524 |
-
|
|
|
2525 |
return model_kwargs
|
2526 |
|
2527 |
# TODO: these are required to make the implementation complete.
|
|
|
2509 |
outputs: ModelOutput,
|
2510 |
model_kwargs: Dict[str, Any],
|
2511 |
is_encoder_decoder: bool = False,
|
|
|
2512 |
num_new_tokens: int = 1,
|
2513 |
) -> Dict[str, Any]:
|
2514 |
if self.config.use_position_ids:
|
|
|
2519 |
del model_kwargs["images"]
|
2520 |
del model_kwargs["image_masks"]
|
2521 |
del model_kwargs["image_input_idx"]
|
2522 |
+
cache_name, cache = super()._extract_past_from_model_output(outputs)
|
2523 |
+
model_kwargs[cache_name] = cache
|
2524 |
+
model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens
|
2525 |
return model_kwargs
|
2526 |
|
2527 |
# TODO: these are required to make the implementation complete.
|