from unittest import TestCase from multi_label_precision_recall_accuracy_fscore import MultiLabelPrecisionRecallAccuracyFscore class MultiLabelPrecisionRecallAccuracyFscoreTest(TestCase): """ All of these tests are also used for multiset configuration. So please mind this and write the test in a way that it is valid for both configurations (do not use same label multiple times). """ def setUp(self): self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore() def test_eok(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [1, 2], [0, 1, 2], ], references=[ [0, 1], [1, 2], [0, 1, 2], ] ) ) def test_eok_string(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ ["0", "1"], ["1", "2"], ["0", "1", "2"], ], references=[ ["0", "1"], ["1", "2"], ["0", "1", "2"], ] ) ) def test_empty(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [], [], [], ], references=[ [], [], [], ] ) ) def test_empty_reference(self): self.assertDictEqual( { "precision": 0.0, "recall": 0.0, "accuracy": 0.0, "fscore": 0.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [1, 2], [0, 1, 2], ], references=[ [], [], [], ] ) ) def test_empty_prediction(self): self.assertDictEqual( { "precision": 0.0, "recall": 0.0, "accuracy": 0.0, "fscore": 0.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [], [], [], ], references=[ [0, 1], [1, 2], [0, 1, 2], ] ) ) def test_completely_different(self): self.assertDictEqual( { "precision": 0.0, "recall": 0.0, "accuracy": 0.0, "fscore": 0.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [1, 2], [0, 1, 2], ], references=[ [3, 4], [5, 6], [7, 8, 9], ] ) ) def test_max_precision(self): self.assertDictEqual( { "precision": 1.0, "recall": 0.5, "accuracy": 0.5, "fscore": 2/3 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1] ], references=[ [0, 1, 2, 3] ] ) ) def test_max_recall(self): self.assertDictEqual( { "precision": 0.5, "recall": 1.0, "accuracy": 0.5, "fscore": 2/3 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 2, 3] ], references=[ [0, 1] ] ) ) def test_partial_match(self): self.assertDictEqual( { "precision": 0.5, "recall": 0.5, "accuracy": 1/3, "fscore": 0.5 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1] ], references=[ [0, 2] ] ) ) def test_partial_match_multi_sample(self): self.assertDictEqual( { "precision": 2.5/3, "recall": 2/3, "accuracy": 0.5, "fscore": 2*(2.5/3 * 2/3) / (2.5/3 + 2/3) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [0, 1], [2, 3] ], references=[ [0, 1, 2, 3], [0, 1, 2, 3], [2] ] ) ) def test_beta(self): self.multi_label_precision_recall_accuracy_fscore.beta = 2 self.assertDictEqual( { "precision": 2.5/3, "recall": 2/3, "accuracy": 0.5, "fscore": 5*(2.5/3 * 2/3) / (4*2.5/3 + 2/3) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [0, 1], [2, 3] ], references=[ [0, 1, 2, 3], [0, 1, 2, 3], [2] ] ) ) self.assertDictEqual( { "precision": 2.5 / 3, "recall": 2 / 3, "accuracy": 0.5, "fscore": 10 * (2.5 / 3 * 2 / 3) / (9 * 2.5 / 3 + 2 / 3) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [0, 1], [2, 3] ], references=[ [0, 1, 2, 3], [0, 1, 2, 3], [2] ], beta=3 ) ) class MultiLabelPrecisionRecallAccuracyFscoreTestMultiset(MultiLabelPrecisionRecallAccuracyFscoreTest): def setUp(self): self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore(config_name="multiset") def test_multiset_eok(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1], [1, 2, 2], [0, 1, 2, 1], ], references=[ [1, 0, 1], [1, 2, 2], [0, 1, 1, 2], ] ) ) def test_multiset_partial_match(self): self.assertDictEqual( { "precision": 1.0, "recall": 0.5, "accuracy": 0.5, "fscore": 2/3 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1] ], references=[ [1, 0, 1, 1, 0, 0], ] ) ) def test_multiset_partial_match_multi_sample(self): p = (1+2/3) / 2 r = (3/4 + 1) / 2 self.assertDictEqual( { "precision": p, "recall": r, "accuracy": (3/4 + 2/3) / 2, "fscore": 2*p*r / (p + r) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1], [1, 2, 2] ], references=[ [1, 0, 1, 1], [1, 2], ] ) )