danieldux commited on
Commit
21292ae
·
1 Parent(s): 535f418

Remove duplicate isco_ham

Browse files
Files changed (1) hide show
  1. isco_ham.py +0 -137
isco_ham.py DELETED
@@ -1,137 +0,0 @@
1
- # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """ISCO-08 Hierarchical Accuracy Measure."""
15
-
16
- import evaluate
17
- import datasets
18
- import ham
19
- import isco
20
-
21
-
22
- # TODO: Add BibTeX citation
23
- _CITATION = """
24
- @article{scikit-learn,
25
- title={Scikit-learn: Machine Learning in {P}ython},
26
- author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
27
- and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
28
- and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
29
- Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
30
- journal={Journal of Machine Learning Research},
31
- volume={12},
32
- pages={2825--2830},
33
- year={2011}
34
- }
35
- """
36
-
37
- _DESCRIPTION = """
38
- The ISCO-08 Hierarchical Accuracy Measure is an implementation of the measure described in [Functional Annotation of Genes Using Hierarchical Text Categorization](https://www.researchgate.net/publication/44046343_Functional_Annotation_of_Genes_Using_Hierarchical_Text_Categorization) (Kiritchenko, Svetlana and Famili, Fazel. 2005) and adapted for the ISCO-08 classification scheme by the International Labour Organization.
39
- """
40
-
41
- _KWARGS_DESCRIPTION = """
42
- Calculates hierarchical precision, hierarchical recall and hierarchical F1 given a list of reference codes and predicted codes from the ISCO-08 taxonomy by the International Labour Organization.
43
-
44
- Args:
45
- - references (List[str]): List of ISCO-08 reference codes. Each reference code should be a single token, 4-digit ISCO-08 code string.
46
- - predictions (List[str]): List of machine predicted or human assigned ISCO-08 codes to score. Each prediction should be a single token, 4-digit ISCO-08 code string.
47
-
48
- Returns:
49
- - hierarchical_precision (`float` or `int`): Hierarchical precision score. Minimum possible value is 0. Maximum possible value is 1.0. A higher score means higher accuracy.
50
- - hierarchical_recall: Hierarchical recall score. Minimum possible value is 0. Maximum possible value is 1.0. A higher score means higher accuracy.
51
- - hierarchical_fmeasure: Hierarchical F1 score. Minimum possible value is 0. Maximum possible value is 1.0. A higher score means higher accuracy.
52
-
53
- Examples:
54
- Example 1
55
-
56
- >>> hierarchical_accuracy_metric = evaluate.load("ham")
57
- >>> results = ham.compute(reference=["1111", "1112", "1113", "1114"], predictions=["1111", "1113", "1120", "1211"])
58
- >>> print(results)
59
- {
60
- 'accuracy': 0.25,
61
- 'hierarchical_precision': 0.7142857142857143,
62
- 'hierarchical_recall': 0.5,
63
- 'hierarchical_fmeasure': 0.588235294117647
64
- }
65
- """
66
-
67
- # TODO: Define external resources urls if needed
68
- ISCO_CSV_MIRROR_URL = (
69
- "https://storage.googleapis.com/isco-public/tables/ISCO_structure.csv"
70
- )
71
- ILO_ISCO_CSV_URL = (
72
- "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv"
73
- )
74
-
75
-
76
- @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
77
- class ISCO_Hierarchical_Accuracy(evaluate.Metric):
78
- """The ISCO-08 Hierarchical Accuracy Measure"""
79
-
80
- def _info(self):
81
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
82
- return evaluate.MetricInfo(
83
- # This is the description that will appear on the modules page.
84
- module_type="metric",
85
- description=_DESCRIPTION,
86
- citation=_CITATION,
87
- inputs_description=_KWARGS_DESCRIPTION,
88
- # This defines the format of each prediction and reference
89
- features=datasets.Features(
90
- {
91
- "predictions": datasets.Value("string"),
92
- "references": datasets.Value("string"),
93
- }
94
- ),
95
- # TODO: Homepage of the module for documentation
96
- homepage="http://module.homepage",
97
- # TODO: Additional links to the codebase or references
98
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
99
- reference_urls=["http://path.to.reference.url/new_module"],
100
- )
101
-
102
- def _download_and_prepare(self, dl_manager):
103
- """Download external ISCO-08 csv file from the ILO website for creating the hierarchy dictionary."""
104
- isco_csv = dl_manager.download_and_extract(ISCO_CSV_MIRROR_URL)
105
- print(f"ISCO CSV file downloaded")
106
- self.isco_hierarchy = isco.create_hierarchy_dict(isco_csv)
107
- print("ISCO hierarchy dictionary created")
108
- print(self.isco_hierarchy)
109
-
110
- def _compute(self, predictions, references):
111
- """Returns the accuracy scores."""
112
- # Convert the inputs to strings
113
- predictions = [str(p) for p in predictions]
114
- references = [str(r) for r in references]
115
-
116
- # Calculate accuracy
117
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(
118
- predictions
119
- )
120
- print(f"Accuracy: {accuracy}")
121
-
122
- # Calculate hierarchical precision, recall and f-measure
123
- hierarchy = self.isco_hierarchy
124
- hP, hR = ham.calculate_hierarchical_precision_recall(
125
- references, predictions, hierarchy
126
- )
127
- hF = ham.hierarchical_f_measure(hP, hR)
128
- print(
129
- f"Hierarchical Precision: {hP}, Hierarchical Recall: {hR}, Hierarchical F-measure: {hF}"
130
- )
131
-
132
- return {
133
- "accuracy": accuracy,
134
- "hierarchical_precision": hP,
135
- "hierarchical_recall": hR,
136
- "hierarchical_fmeasure": hF,
137
- }