mdocekal commited on
Commit
5ae6761
β€’
1 Parent(s): 728e0e5

Init commit containing implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014) and multiset variant.

Browse files
README.md CHANGED
@@ -13,16 +13,49 @@ pinned: false
13
  ---
14
 
15
  # Metric Card for Multi Label Precision Recall Accuracy Fscore
 
16
 
17
- ***Module Card Instructions:*** *Fill out the following subsections. Feel free to take a look at existing metric cards if you'd like examples.*
18
 
19
- ## Metric Description
20
- *Give a brief overview of this metric, including what task(s) it is usually used for, if any.*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- ## How to Use
23
- *Give general statement of how to use the metric*
24
 
25
- *Provide simplest possible example for using the metric*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  ### Inputs
28
  *List all input arguments in the format below*
 
13
  ---
14
 
15
  # Metric Card for Multi Label Precision Recall Accuracy Fscore
16
+ Implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014).
17
 
18
+ ## How to Use
19
 
20
+ >>> multi_label_precision_recall_accuracy_fscore = evaluate.load("mdocekal/multi_label_precision_recall_accuracy_fscore")
21
+ >>> results = multi_label_precision_recall_accuracy_fscore.compute(
22
+ predictions=[
23
+ ["0", "1"],
24
+ ["1", "2"],
25
+ ["0", "1", "2"],
26
+ ],
27
+ references=[
28
+ ["0", "1"],
29
+ ["1", "2"],
30
+ ["0", "1", "2"],
31
+ ]
32
+ )
33
+ >>> print(results)
34
+ {
35
+ "precision": 1.0,
36
+ "recall": 1.0,
37
+ "accuracy": 1.0,
38
+ "fscore": 1.0
39
+ }
40
 
41
+ There is also multiset configuration available, which allows to calculate the metrics for multi-label classification with repeated labels.
42
+ It uses the same definition as in previous case, but it works with multiset of labels. Thus, intersection, union, and cardinality for multisets are used instead.
43
 
44
+ >>> results = multi_label_precision_recall_accuracy_fscore.compute(
45
+ predictions=[
46
+ [0, 1, 1]
47
+ ],
48
+ references=[
49
+ [1, 0, 1, 1, 0, 0],
50
+ ]
51
+ )
52
+ >>> print(results)
53
+ {
54
+ "precision": 1.0,
55
+ "recall": 0.5,
56
+ "accuracy": 0.5,
57
+ "fscore": 0.6666666666666666
58
+ }
59
 
60
  ### Inputs
61
  *List all input arguments in the format below*
multi_label_precision_recall_accuracy_fscore.py CHANGED
@@ -11,58 +11,79 @@
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
- """TODO: Add a description here."""
 
 
15
 
16
  import evaluate
17
  import datasets
18
 
19
 
20
- # TODO: Add BibTeX citation
21
  _CITATION = """\
22
- @InProceedings{huggingface:module,
23
- title = {A great new module},
24
- authors={huggingface, Inc.},
25
- year={2020}
 
 
 
 
26
  }
27
  """
28
 
29
- # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
32
  """
33
 
34
-
35
- # TODO: Add description of the arguments of the module here
36
  _KWARGS_DESCRIPTION = """
37
- Calculates how good are predictions given some references, using certain scores
38
  Args:
39
  predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
  references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
 
 
46
  Examples:
47
- Examples should be written in doctest format, and should illustrate how
48
- to use the function.
49
 
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
 
 
 
 
 
 
 
 
 
 
 
52
  >>> print(results)
53
- {'accuracy': 1.0}
 
 
 
 
 
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class MultiLabelPrecisionRecallAccuracyFscore(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
 
 
 
 
 
 
 
63
 
64
  def _info(self):
65
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
67
  # This is the description that will appear on the modules page.
68
  module_type="metric",
@@ -70,26 +91,87 @@ class MultiLabelPrecisionRecallAccuracyFscore(evaluate.Metric):
70
  citation=_CITATION,
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
- features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
- }),
77
- # Homepage of the module for documentation
78
- homepage="http://module.homepage",
79
- # Additional links to the codebase or references
80
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
 
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- def _compute(self, predictions, references):
90
- """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
93
  return {
 
 
94
  "accuracy": accuracy,
95
- }
 
 
 
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
+
15
+ from collections import Counter
16
+ from typing import Optional, Union
17
 
18
  import evaluate
19
  import datasets
20
 
21
 
 
22
  _CITATION = """\
23
+ @article{Zhang2014ARO,
24
+ title={A Review on Multi-Label Learning Algorithms},
25
+ author={Min-Ling Zhang and Zhi-Hua Zhou},
26
+ journal={IEEE Transactions on Knowledge and Data Engineering},
27
+ year={2014},
28
+ volume={26},
29
+ pages={1819-1837},
30
+ url={https://api.semanticscholar.org/CorpusID:1008003}
31
  }
32
  """
33
 
 
34
  _DESCRIPTION = """\
35
+ Implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014).
36
  """
37
 
 
 
38
  _KWARGS_DESCRIPTION = """
39
+ Implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014).
40
  Args:
41
  predictions: list of predictions to score. Each predictions
42
+ should be a list of predicted labels
43
  references: list of reference for each prediction. Each
44
+ reference should be a list of reference labels
45
  Returns:
46
+ precision
47
+ recall
48
+ accuracy
49
+ fscore
50
  Examples:
 
 
51
 
52
+ >>> multi_label_precision_recall_accuracy_fscore = evaluate.load("mdocekal/multi_label_precision_recall_accuracy_fscore")
53
+ >>> results = multi_label_precision_recall_accuracy_fscore.compute(
54
+ predictions=[
55
+ ["0", "1"],
56
+ ["1", "2"],
57
+ ["0", "1", "2"],
58
+ ],
59
+ references=[
60
+ ["0", "1"],
61
+ ["1", "2"],
62
+ ["0", "1", "2"],
63
+ ]
64
+ )
65
  >>> print(results)
66
+ {
67
+ "precision": 1.0,
68
+ "recall": 1.0,
69
+ "accuracy": 1.0,
70
+ "fscore": 1.0
71
+ }
72
  """
73
 
 
 
 
74
 
75
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
76
  class MultiLabelPrecisionRecallAccuracyFscore(evaluate.Metric):
77
+ """
78
+ Implementation of example based evaluation metrics for multi-label classification presented in Zhang and Zhou (2014).
79
+ """
80
+
81
+ def __init__(self, *args, **kwargs):
82
+ super().__init__(*args, **kwargs)
83
+ self.beta = kwargs.get("beta", 1.0)
84
+ self.use_multiset = self.config_name == "multiset"
85
 
86
  def _info(self):
 
87
  return evaluate.MetricInfo(
88
  # This is the description that will appear on the modules page.
89
  module_type="metric",
 
91
  citation=_CITATION,
92
  inputs_description=_KWARGS_DESCRIPTION,
93
  # This defines the format of each prediction and reference
94
+ features=[
95
+ datasets.Features({
96
+ 'predictions': datasets.Sequence(datasets.Value('int64')),
97
+ 'references': datasets.Sequence(datasets.Value('int64')),
98
+ }),
99
+ datasets.Features({
100
+ 'predictions': datasets.Sequence(datasets.Value('string')),
101
+ 'references': datasets.Sequence(datasets.Value('string')),
102
+ }),
103
+ ]
104
  )
105
 
106
+ def eval_example(self, prediction, reference):
107
+ if self.use_multiset:
108
+ prediction = Counter(prediction)
109
+ reference = Counter(reference)
110
+
111
+ intersection_cardinality = sum((prediction & reference).values())
112
+ union_cardinality = sum((prediction | reference).values())
113
+
114
+ prediction_cardinality = sum(prediction.values())
115
+ reference_cardinality = sum(reference.values())
116
+ else:
117
+ prediction = set(prediction)
118
+ reference = set(reference)
119
+
120
+ intersection_cardinality = len(prediction & reference)
121
+ union_cardinality = len(prediction | reference)
122
+
123
+ prediction_cardinality = len(prediction)
124
+ reference_cardinality = len(reference)
125
+
126
+ precision = intersection_cardinality / prediction_cardinality if prediction_cardinality > 0 else 0
127
+ recall = intersection_cardinality / reference_cardinality if reference_cardinality > 0 else 0
128
+ accuracy = intersection_cardinality / union_cardinality if union_cardinality > 0 else 0
129
+
130
+ return precision, recall, accuracy
131
+
132
+ def _compute(self, predictions: list[list[Union[int, str]]], references: list[list[Union[int, str]]],
133
+ beta: Optional[float] = None) -> dict[str, float]:
134
+ """
135
+ Computes metrics for a list of predictions and references
136
+
137
+ Args:
138
+ predictions: list of predictions to score. Each predictions
139
+ should be a list of predicted labels
140
+ references: list of reference for each prediction. Each
141
+ reference should be a list of reference labels
142
+ beta: beta value for F-score calculation
143
+ if None the beta is set to default value
144
+ Returns: dict with
145
+ precision
146
+ recall
147
+ accuracy
148
+ fscore
149
+ """
150
+ assert len(predictions) == len(references), "Predictions and references must have the same length"
151
+ if beta is None:
152
+ beta = self.beta
153
+
154
+ precision, recall, accuracy = 0, 0, 0
155
+
156
+ for p, r in zip(predictions, references):
157
+ p, r, a = self.eval_example(p, r)
158
+ precision += p
159
+ recall += r
160
+ accuracy += a
161
+
162
+ precision /= len(predictions)
163
+ recall /= len(predictions)
164
+ accuracy /= len(predictions)
165
+
166
+ if precision + recall == 0:
167
+ fscore = 0.0
168
+ else:
169
+ fscore = (1 + beta**2) * precision * recall / (beta**2 * precision + recall)
170
 
 
 
 
 
171
  return {
172
+ "precision": precision,
173
+ "recall": recall,
174
  "accuracy": accuracy,
175
+ "fscore": fscore
176
+ }
177
+
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
1
+ evaluate
2
+ datasets
tests.py CHANGED
@@ -1,17 +1,333 @@
1
- test_cases = [
2
- {
3
- "predictions": [0, 0],
4
- "references": [1, 1],
5
- "result": {"metric_score": 0}
6
- },
7
- {
8
- "predictions": [1, 1],
9
- "references": [1, 1],
10
- "result": {"metric_score": 1}
11
- },
12
- {
13
- "predictions": [1, 0],
14
- "references": [1, 1],
15
- "result": {"metric_score": 0.5}
16
- }
17
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest import TestCase
2
+
3
+ from multi_label_precision_recall_accuracy_fscore import MultiLabelPrecisionRecallAccuracyFscore
4
+
5
+
6
+ class MultiLabelPrecisionRecallAccuracyFscoreTest(TestCase):
7
+ """
8
+ All of these tests are also used for multiset configuration. So please mind this and write the test in a way that
9
+ it is valid for both configurations (do not use same label multiple times).
10
+ """
11
+ def setUp(self):
12
+ self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore()
13
+
14
+ def test_eok(self):
15
+ self.assertDictEqual(
16
+ {
17
+ "precision": 1.0,
18
+ "recall": 1.0,
19
+ "accuracy": 1.0,
20
+ "fscore": 1.0
21
+ },
22
+ self.multi_label_precision_recall_accuracy_fscore.compute(
23
+ predictions=[
24
+ [0, 1],
25
+ [1, 2],
26
+ [0, 1, 2],
27
+ ],
28
+ references=[
29
+ [0, 1],
30
+ [1, 2],
31
+ [0, 1, 2],
32
+ ]
33
+ )
34
+ )
35
+
36
+ def test_eok_string(self):
37
+ self.assertDictEqual(
38
+ {
39
+ "precision": 1.0,
40
+ "recall": 1.0,
41
+ "accuracy": 1.0,
42
+ "fscore": 1.0
43
+ },
44
+ self.multi_label_precision_recall_accuracy_fscore.compute(
45
+ predictions=[
46
+ ["0", "1"],
47
+ ["1", "2"],
48
+ ["0", "1", "2"],
49
+ ],
50
+ references=[
51
+ ["0", "1"],
52
+ ["1", "2"],
53
+ ["0", "1", "2"],
54
+ ]
55
+ )
56
+ )
57
+
58
+ def test_empty(self):
59
+ self.assertDictEqual(
60
+ {
61
+ "precision": 0.0,
62
+ "recall": 0.0,
63
+ "accuracy": 0.0,
64
+ "fscore": 0.0
65
+ },
66
+ self.multi_label_precision_recall_accuracy_fscore.compute(
67
+ predictions=[
68
+ [],
69
+ [],
70
+ [],
71
+ ],
72
+ references=[
73
+ [],
74
+ [],
75
+ [],
76
+ ]
77
+ )
78
+ )
79
+
80
+ def test_empty_reference(self):
81
+ self.assertDictEqual(
82
+ {
83
+ "precision": 0.0,
84
+ "recall": 0.0,
85
+ "accuracy": 0.0,
86
+ "fscore": 0.0
87
+ },
88
+ self.multi_label_precision_recall_accuracy_fscore.compute(
89
+ predictions=[
90
+ [0, 1],
91
+ [1, 2],
92
+ [0, 1, 2],
93
+ ],
94
+ references=[
95
+ [],
96
+ [],
97
+ [],
98
+ ]
99
+ )
100
+ )
101
+
102
+ def test_empty_prediction(self):
103
+ self.assertDictEqual(
104
+ {
105
+ "precision": 0.0,
106
+ "recall": 0.0,
107
+ "accuracy": 0.0,
108
+ "fscore": 0.0
109
+ },
110
+ self.multi_label_precision_recall_accuracy_fscore.compute(
111
+ predictions=[
112
+ [],
113
+ [],
114
+ [],
115
+ ],
116
+ references=[
117
+ [0, 1],
118
+ [1, 2],
119
+ [0, 1, 2],
120
+ ]
121
+ )
122
+ )
123
+
124
+ def test_completely_different(self):
125
+ self.assertDictEqual(
126
+ {
127
+ "precision": 0.0,
128
+ "recall": 0.0,
129
+ "accuracy": 0.0,
130
+ "fscore": 0.0
131
+ },
132
+ self.multi_label_precision_recall_accuracy_fscore.compute(
133
+ predictions=[
134
+ [0, 1],
135
+ [1, 2],
136
+ [0, 1, 2],
137
+ ],
138
+ references=[
139
+ [3, 4],
140
+ [5, 6],
141
+ [7, 8, 9],
142
+ ]
143
+ )
144
+ )
145
+
146
+ def test_max_precision(self):
147
+ self.assertDictEqual(
148
+ {
149
+ "precision": 1.0,
150
+ "recall": 0.5,
151
+ "accuracy": 0.5,
152
+ "fscore": 2/3
153
+ },
154
+ self.multi_label_precision_recall_accuracy_fscore.compute(
155
+ predictions=[
156
+ [0, 1]
157
+ ],
158
+ references=[
159
+ [0, 1, 2, 3]
160
+ ]
161
+ )
162
+ )
163
+
164
+ def test_max_recall(self):
165
+ self.assertDictEqual(
166
+ {
167
+ "precision": 0.5,
168
+ "recall": 1.0,
169
+ "accuracy": 0.5,
170
+ "fscore": 2/3
171
+ },
172
+ self.multi_label_precision_recall_accuracy_fscore.compute(
173
+ predictions=[
174
+ [0, 1, 2, 3]
175
+ ],
176
+ references=[
177
+ [0, 1]
178
+ ]
179
+ )
180
+ )
181
+
182
+ def test_partial_match(self):
183
+ self.assertDictEqual(
184
+ {
185
+ "precision": 0.5,
186
+ "recall": 0.5,
187
+ "accuracy": 1/3,
188
+ "fscore": 0.5
189
+ },
190
+ self.multi_label_precision_recall_accuracy_fscore.compute(
191
+ predictions=[
192
+ [0, 1]
193
+ ],
194
+ references=[
195
+ [0, 2]
196
+ ]
197
+ )
198
+ )
199
+
200
+ def test_partial_match_multi_sample(self):
201
+ self.assertDictEqual(
202
+ {
203
+ "precision": 2.5/3,
204
+ "recall": 2/3,
205
+ "accuracy": 0.5,
206
+ "fscore": 2*(2.5/3 * 2/3) / (2.5/3 + 2/3)
207
+ },
208
+ self.multi_label_precision_recall_accuracy_fscore.compute(
209
+ predictions=[
210
+ [0, 1],
211
+ [0, 1],
212
+ [2, 3]
213
+ ],
214
+ references=[
215
+ [0, 1, 2, 3],
216
+ [0, 1, 2, 3],
217
+ [2]
218
+ ]
219
+ )
220
+ )
221
+
222
+ def test_beta(self):
223
+ self.multi_label_precision_recall_accuracy_fscore.beta = 2
224
+ self.assertDictEqual(
225
+ {
226
+ "precision": 2.5/3,
227
+ "recall": 2/3,
228
+ "accuracy": 0.5,
229
+ "fscore": 5*(2.5/3 * 2/3) / (4*2.5/3 + 2/3)
230
+ },
231
+ self.multi_label_precision_recall_accuracy_fscore.compute(
232
+ predictions=[
233
+ [0, 1],
234
+ [0, 1],
235
+ [2, 3]
236
+ ],
237
+ references=[
238
+ [0, 1, 2, 3],
239
+ [0, 1, 2, 3],
240
+ [2]
241
+ ]
242
+ )
243
+ )
244
+ self.assertDictEqual(
245
+ {
246
+ "precision": 2.5 / 3,
247
+ "recall": 2 / 3,
248
+ "accuracy": 0.5,
249
+ "fscore": 10 * (2.5 / 3 * 2 / 3) / (9 * 2.5 / 3 + 2 / 3)
250
+ },
251
+ self.multi_label_precision_recall_accuracy_fscore.compute(
252
+ predictions=[
253
+ [0, 1],
254
+ [0, 1],
255
+ [2, 3]
256
+ ],
257
+ references=[
258
+ [0, 1, 2, 3],
259
+ [0, 1, 2, 3],
260
+ [2]
261
+ ],
262
+ beta=3
263
+ )
264
+ )
265
+
266
+
267
+ class MultiLabelPrecisionRecallAccuracyFscoreTestMultiset(MultiLabelPrecisionRecallAccuracyFscoreTest):
268
+ def setUp(self):
269
+ self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore(config_name="multiset")
270
+
271
+ def test_multiset_eok(self):
272
+ self.assertDictEqual(
273
+ {
274
+ "precision": 1.0,
275
+ "recall": 1.0,
276
+ "accuracy": 1.0,
277
+ "fscore": 1.0
278
+ },
279
+ self.multi_label_precision_recall_accuracy_fscore.compute(
280
+ predictions=[
281
+ [0, 1, 1],
282
+ [1, 2, 2],
283
+ [0, 1, 2, 1],
284
+ ],
285
+ references=[
286
+ [1, 0, 1],
287
+ [1, 2, 2],
288
+ [0, 1, 1, 2],
289
+ ]
290
+ )
291
+ )
292
+
293
+ def test_multiset_partial_match(self):
294
+
295
+ self.assertDictEqual(
296
+ {
297
+ "precision": 1.0,
298
+ "recall": 0.5,
299
+ "accuracy": 0.5,
300
+ "fscore": 2/3
301
+ },
302
+ self.multi_label_precision_recall_accuracy_fscore.compute(
303
+ predictions=[
304
+ [0, 1, 1]
305
+ ],
306
+ references=[
307
+ [1, 0, 1, 1, 0, 0],
308
+ ]
309
+ )
310
+ )
311
+
312
+ def test_multiset_partial_match_multi_sample(self):
313
+ p = (1+2/3) / 2
314
+ r = (3/4 + 1) / 2
315
+
316
+ self.assertDictEqual(
317
+ {
318
+ "precision": p,
319
+ "recall": r,
320
+ "accuracy": (3/4 + 2/3) / 2,
321
+ "fscore": 2*p*r / (p + r)
322
+ },
323
+ self.multi_label_precision_recall_accuracy_fscore.compute(
324
+ predictions=[
325
+ [0, 1, 1],
326
+ [1, 2, 2]
327
+ ],
328
+ references=[
329
+ [1, 0, 1, 1],
330
+ [1, 2],
331
+ ]
332
+ )
333
+ )