KeyError: 'RewardModel'

#1
by ragesh-beo - opened

I am trying to get the model locally and facing an error
ValueError: The checkpoint you are trying to load has model type RewardModel but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.

My transformer version is 4.46.3

Yale NLP Lab org
β€’
edited 5 days ago

Hi, thanks for the inquiry -- does it work if you include the following code before you load the model?:

from transformers import AutoTokenizer, AutoModel, AutoConfig, LlamaConfig, PreTrainedModel, LlamaForSequenceClassification
import torch.nn as nn
import torch

# Login to HF to access LLAMA model
from huggingface_hub import login
login("") # HF token

class RewardModelConfig(LlamaConfig):
    model_type = "RewardModel"

    def __init__(self, reward_dim=None, base_model_name=None, **kwargs):
        super().__init__(**kwargs)
        
        self.reward_dim = reward_dim
        self.base_model_name = base_model_name

class RewardModel(PreTrainedModel):
    config_class = RewardModelConfig

    def create_base_model(self):
        
        # use sequence classification model for consistency with https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1 
        BACKBONE_MODEL =  LlamaForSequenceClassification.from_pretrained( 
            self.config.base_model_name,
            config=LlamaConfig.from_pretrained(self.config.base_model_name),
        )
        BACKBONE_MODEL.config.pad_token_id = BACKBONE_MODEL.config.eos_token_id
        BACKBONE_MODEL.config.output_hidden_states = True

        for param in BACKBONE_MODEL.parameters():
            param.requires_grad = False

        return BACKBONE_MODEL

    def __init__(self, config):
        super(RewardModel, self).__init__(config)
        
        # use .base_model to remove lm_head
        self.BASE_MODEL = self.create_base_model().base_model 

        # regression head for reward prediction
        self.regression_head = nn.Linear(self.BASE_MODEL.config.hidden_size, config.reward_dim)
        
    def forward(self, input_ids, attention_mask=None, rewards=None, **kwargs):

        # forward pass through the base model
        outputs = self.BASE_MODEL(input_ids, attention_mask=attention_mask, **kwargs)
        
        hidden_states = outputs.hidden_states[-1]

        # access hidden state corresponding to the last token in each sequence across the batch
        last_token_hidden_state = hidden_states[:, -1, :] 
        reward_predictions = self.regression_head(last_token_hidden_state)

        return reward_predictions

    def prepare_inputs_for_generation(self, *args, **kwargs):
        return self.BASE_MODEL.prepare_inputs_for_generation(*args, **kwargs)

AutoConfig.register("RewardModel", RewardModelConfig)
AutoModel.register(RewardModelConfig, RewardModel)

Seems to be working now @pybeebee

ragesh-beo changed discussion status to closed

Sign up or log in to comment