```python class GPTRewardModel(nn.Module): def __init__(self): super().__init__() model = AutoModelForCausalLM.from_pretrained("pvduy/vicuna-13b-v1.1") self.config = model.config self.config.n_embd = self.config.hidden_size if hasattr(self.config, "hidden_size") else self.config.n_embd self.transformer = model.model self.v_head = nn.Linear(self.config.n_embd, 1, bias=False) self.tokenizer = AutoTokenizer.from_pretrained("pvduy/vicuna-13b-v1.1") self.tokenizer.pad_token_id = self.tokenizer.eos_token_id self.tokenizer.padding_side = "right" self.PAD_ID = self.tokenizer.pad_token_id def forward( self, input_ids=None, past_key_values=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, mc_token_ids=None, labels=None, return_dict=False, output_attentions=False, output_hidden_states=False, ): loss = None transformer_outputs = self.transformer( input_ids, attention_mask=attention_mask, ) hidden_states = transformer_outputs[0] rewards = self.v_head(hidden_states).squeeze(-1) ends = torch.argmax((input_ids == self.PAD_ID).type(torch.float32), dim=1).view(-1, 1) rewards = torch.gather(rewards, 1, ends) return rewards ```