Spaces:

alwayse
/

MMD_MP_Text_Dection

Sleeping

App Files Files Community

MMD_MP_Text_Dection / dataTST.py

alwayse

Upload 9 files

d0e1f8b about 1 year ago

raw

history blame contribute delete

2.13 kB

	import numpy as np
	import torch
	import random
	from meta_train import mmdPreModel
	from collections import namedtuple
	import joblib
	from transformers import RobertaTokenizer, RobertaModel


	def api_init():

	random.seed(0)
	np.random.seed(0)
	torch.manual_seed(0)
	torch.cuda.manual_seed(0)
	torch.cuda.manual_seed_all(0)
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

	model_name = 'roberta-base-openai-detector'
	model_path_api = f'.'
	token_num, hidden_size = 100, 768

	Config = namedtuple('Config', ['in_dim', 'hid_dim', 'dropout', 'out_dim', 'token_num'])
	config = Config(
	in_dim=hidden_size,
	token_num=token_num,
	hid_dim=512,
	dropout=0.2,
	out_dim=300,)

	net = mmdPreModel(config=config, num_mlp=0, transformer_flag=True, num_hidden_layers=1)

	# load the features and models
	feature_ref_for_test_filename = f'{model_path_api}/feature_ref_for_test.pt'
	model_filename = f'{model_path_api}/logistic_regression_model.pkl'
	net_filename = f'{model_path_api}/net.pt'

	load_ref_data = torch.load(feature_ref_for_test_filename,map_location=torch.device('cpu')) # cpu
	loaded_model = joblib.load(model_filename) # cpu
	checkpoint = torch.load(net_filename,map_location=torch.device('cpu'))
	net.load_state_dict(checkpoint['net'])
	sigma, sigma0_u, ep = checkpoint['sigma'], checkpoint['sigma0_u'], checkpoint['ep']

	# generic generative model
	cache_dir = ".cache"
	base_tokenizer = RobertaTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
	base_model = RobertaModel.from_pretrained(model_name, output_hidden_states=True, cache_dir=cache_dir)

	# whether load the model to gpu
	gpu_using = False

	DEVICE = torch.device("cpu")
	if gpu_using:
	DEVICE = torch.device("cuda:0")
	net = net.to(DEVICE)
	sigma, sigma0_u, ep = sigma.to(DEVICE), sigma0_u.to(DEVICE), ep.to(DEVICE)
	load_ref_data = load_ref_data.to(DEVICE)
	base_model = base_model.to(DEVICE)
	num_ref = 5000
	feature_ref = load_ref_data[np.random.permutation(load_ref_data.shape[0])][:num_ref].to(DEVICE)

	return base_model, base_tokenizer, net, feature_ref, sigma, sigma0_u, ep, loaded_model, DEVICE