Jaren
/

chinese-roberta-wwm-ext-large-NLI

Model card Files Files and versions Community

chinese-roberta-wwm-ext-large-NLI / README.md

Jaren's picture

Update README.md

e250e9a over 2 years ago

|

1.67 kB

	This model used hfl/chinese-roberta-wwm-ext-large backbone and was trained on SNLI, MNLI, DNLI, KvPI, OCNLI, CMNLI data in Chinese version.
	Model structures are as follows:

	```python
	class RobertaForSequenceClassification(nn.Module):
	def __init__(self, tagset_size):
	super(RobertaForSequenceClassification, self).__init__()
	self.tagset_size = tagset_size

	self.roberta_single= AutoModel.from_pretrained(pretrain_model_dir)
	self.single_hidden2tag = RobertaClassificationHead(bert_hidden_dim, tagset_size)

	def forward(self, input_ids, input_mask):
	outputs_single = self.roberta_single(input_ids, input_mask, None)
	hidden_states_single = outputs_single[1]#torch.tanh(self.hidden_layer_2(torch.tanh(self.hidden_layer_1(outputs_single[1])))) #(batch, hidden)

	score_single = self.single_hidden2tag(hidden_states_single) #(batch, tag_set)
	return score_single



	class RobertaClassificationHead(nn.Module):
	def __init__(self, bert_hidden_dim, num_labels):
	super(RobertaClassificationHead, self).__init__()
	self.dense = nn.Linear(bert_hidden_dim, bert_hidden_dim)
	self.dropout = nn.Dropout(0.1)
	self.out_proj = nn.Linear(bert_hidden_dim, num_labels)

	def forward(self, features):
	x = features#[:, 0, :] # take <s> token (equiv. to [CLS])
	x = self.dropout(x)
	x = self.dense(x)
	x = torch.tanh(x)
	x = self.dropout(x)
	x = self.out_proj(x)
	return x
	model = RobertaForSequenceClassification(num_labels)
	model.load_state_dict(torch.load(args.model_save_path+'Roberta_large_model.pt', map_location=device))
	```