test-suite

Running

test-suite / test-suite.py

only accuracy

88ac74b over 1 year ago

1.59 kB

	import evaluate
	from evaluate.evaluation_suite import SubTask

	# This is odd because the first dataset is multi-class and
	# the second dataset is binary. The model I'm using has 4 labels
	# and is finetuned to the first dataset.
	# So what does it mean for this model to be evaluated on the second
	# dataset?

	metric = evaluate.combine(["accuracy"])

	class Suite(evaluate.EvaluationSuite):

	def __init__(self, name):
	super().__init__(name)
	self.preprocessor = lambda x: {"text": x["text"].lower()}
	self.suite = [
	SubTask(
	task_type="text-classification",
	data="hate_speech18",
	split="train[:1000]",
	args_for_task={
	"metric": metric,
	"input_column": "text",
	"label_column": "label",
	"label_mapping": {
	"NO_HATE": 0.0,
	"HATE": 1.0,
	"RELATION": 1.0,
	"IDK": 1.0
	}
	}
	),
	SubTask(
	task_type="text-classification",
	data="mteb/toxic_conversations_50k",
	split="test[:1000]",
	args_for_task={
	"metric": metric,
	"input_column": "text",
	"label_column": "label",
	"label_mapping": {
	"NO_HATE": 0.0,
	"HATE": 1.0
	}
	}
	)
	]