minskiter
/

cossim-bert-chinese-wwm-ext

feature-extraction

Model card Files Files and versions Community

cossim-bert-chinese-wwm-ext / modeling_bert.py

minskiter's picture

feat(model): update model parameters

57034b1 over 1 year ago

history blame contribute delete

2.95 kB

	from transformers import PretrainedConfig, PreTrainedModel, BertModel, BertConfig
	from .configuration_bert import SimBertConfig
	from torch import nn

	class SimBertModel(PreTrainedModel):
	""" SimBert Model
	"""

	config_class = SimBertConfig

	def __init__(
	self,
	config: PretrainedConfig
	) -> None:
	super().__init__(config)
	self.bert = BertModel(config=config, add_pooling_layer=True)
	self.fc = nn.Linear(config.hidden_size, 2)
	# self.loss_fct = nn.CrossEntropyLoss()
	self.loss_fct = nn.MSELoss()
	self.softmax = nn.Softmax(dim=1)

	def forward(
	self,
	input_ids,
	token_type_ids,
	attention_mask,
	labels=None
	):
	outputs = self.bert(
	input_ids=input_ids,
	attention_mask=attention_mask,
	token_type_ids=token_type_ids
	)
	pooled_output = outputs.pooler_output
	logits = self.fc(pooled_output)
	logits = self.softmax(logits)[:,1]
	if labels is not None:
	loss = self.loss_fct(logits.view(-1), labels.view(-1))
	return loss, logits
	return None, logits

	class CosSimBertModel(PreTrainedModel):
	""" CosSimBert Model
	"""

	config_class = SimBertConfig

	def __init__(
	self,
	config: PretrainedConfig
	) -> None:
	super().__init__(config)
	self.bert = BertModel(config=config, add_pooling_layer=True)
	self.loss_fct = nn.MSELoss()
	self.softmax = nn.Softmax(dim=1)

	def forward(
	self,
	input_ids,
	token_type_ids,
	attention_mask,
	labels=None
	):
	seq_length = input_ids.size(-1)
	a = {
	"input_ids": input_ids[:,:seq_length//2],
	"token_type_ids": token_type_ids[:,:seq_length//2],
	"attention_mask": attention_mask[:,:seq_length//2]
	}
	b = {
	"input_ids": input_ids[:,seq_length//2:],
	"token_type_ids": token_type_ids[:,seq_length//2:],
	"attention_mask": attention_mask[:,seq_length//2:]
	}
	outputs_a = self.bert(**a)
	outputs_b = self.bert(**b)
	pooled_a_output = outputs_a.pooler_output
	pooled_b_output = outputs_b.pooler_output
	logits = nn.functional.cosine_similarity(pooled_a_output, pooled_b_output)
	if labels is not None:
	loss = self.loss_fct(logits.view(-1), labels.view(-1))
	return loss, logits
	return None, logits

	def encode(
	self,
	input_ids,
	token_type_ids,
	attention_mask,
	):
	outputs = self.bert(
	input_ids=input_ids,
	attention_mask=attention_mask,
	token_type_ids=token_type_ids
	)
	pooled_output = outputs.pooler_output
	return pooled_output