Spaces:

danieldux
/

isco_hierarchical_accuracy

Build error

App Files Files Community

danieldux commited on Mar 4, 2024

Commit

21292ae

1 Parent(s): 535f418

Remove duplicate isco_ham

Browse files

Files changed (1) hide show

isco_ham.py +0 -137

isco_ham.py DELETED Viewed

@@ -1,137 +0,0 @@
-# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""ISCO-08 Hierarchical Accuracy Measure."""
-import evaluate
-import datasets
-import ham
-import isco
-# TODO: Add BibTeX citation
-_CITATION = """
-@article{scikit-learn,
-  title={Scikit-learn: Machine Learning in {P}ython},
-  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
-         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
-         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
-         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
-  journal={Journal of Machine Learning Research},
-  volume={12},
-  pages={2825--2830},
-  year={2011}
-}
-"""
-_DESCRIPTION = """
-The ISCO-08 Hierarchical Accuracy Measure is an implementation of the measure described in [Functional Annotation of Genes Using Hierarchical Text Categorization](https://www.researchgate.net/publication/44046343_Functional_Annotation_of_Genes_Using_Hierarchical_Text_Categorization) (Kiritchenko, Svetlana and Famili, Fazel. 2005) and adapted for the ISCO-08 classification scheme by the International Labour Organization.
-"""
-_KWARGS_DESCRIPTION = """
-Calculates hierarchical precision, hierarchical recall and hierarchical F1 given a list of reference codes and predicted codes from the ISCO-08 taxonomy by the International Labour Organization.
-Args:
-    - references (List[str]): List of ISCO-08 reference codes. Each reference code should be a single token, 4-digit ISCO-08 code string.
-    - predictions (List[str]): List of machine predicted or human assigned ISCO-08 codes to score. Each prediction should be a single token, 4-digit ISCO-08 code string.
-Returns:
-    - hierarchical_precision (`float` or `int`): Hierarchical precision score. Minimum possible value is 0. Maximum possible value is 1.0. A higher score means higher accuracy.
-    - hierarchical_recall: Hierarchical recall score. Minimum possible value is 0. Maximum possible value is 1.0. A higher score means higher accuracy.
-    - hierarchical_fmeasure: Hierarchical F1 score. Minimum possible value is 0. Maximum possible value is 1.0. A higher score means higher accuracy.
-Examples:
-    Example 1
-    >>> hierarchical_accuracy_metric = evaluate.load("ham")
-    >>> results = ham.compute(reference=["1111", "1112", "1113", "1114"], predictions=["1111", "1113", "1120", "1211"])
-    >>> print(results)
-    {
-        'accuracy': 0.25,
-        'hierarchical_precision': 0.7142857142857143,
-        'hierarchical_recall': 0.5,
-        'hierarchical_fmeasure': 0.588235294117647
-    }
-"""
-# TODO: Define external resources urls if needed
-ISCO_CSV_MIRROR_URL = (
-    "https://storage.googleapis.com/isco-public/tables/ISCO_structure.csv"
-)
-ILO_ISCO_CSV_URL = (
-    "https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv"
-)
-@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
-class ISCO_Hierarchical_Accuracy(evaluate.Metric):
-    """The ISCO-08 Hierarchical Accuracy Measure"""
-    def _info(self):
-        # TODO: Specifies the evaluate.EvaluationModuleInfo object
-        return evaluate.MetricInfo(
-            # This is the description that will appear on the modules page.
-            module_type="metric",
-            description=_DESCRIPTION,
-            citation=_CITATION,
-            inputs_description=_KWARGS_DESCRIPTION,
-            # This defines the format of each prediction and reference
-            features=datasets.Features(
-                {
-                    "predictions": datasets.Value("string"),
-                    "references": datasets.Value("string"),
-                }
-            ),
-            # TODO: Homepage of the module for documentation
-            homepage="http://module.homepage",
-            # TODO: Additional links to the codebase or references
-            codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
-            reference_urls=["http://path.to.reference.url/new_module"],
-        )
-    def _download_and_prepare(self, dl_manager):
-        """Download external ISCO-08 csv file from the ILO website for creating the hierarchy dictionary."""
-        isco_csv = dl_manager.download_and_extract(ISCO_CSV_MIRROR_URL)
-        print(f"ISCO CSV file downloaded")
-        self.isco_hierarchy = isco.create_hierarchy_dict(isco_csv)
-        print("ISCO hierarchy dictionary created")
-        print(self.isco_hierarchy)
-    def _compute(self, predictions, references):
-        """Returns the accuracy scores."""
-        # Convert the inputs to strings
-        predictions = [str(p) for p in predictions]
-        references = [str(r) for r in references]
-        # Calculate accuracy
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(
-            predictions
-        )
-        print(f"Accuracy: {accuracy}")
-        # Calculate hierarchical precision, recall and f-measure
-        hierarchy = self.isco_hierarchy
-        hP, hR = ham.calculate_hierarchical_precision_recall(
-            references, predictions, hierarchy
-        )
-        hF = ham.hierarchical_f_measure(hP, hR)
-        print(
-            f"Hierarchical Precision: {hP}, Hierarchical Recall: {hR}, Hierarchical F-measure: {hF}"
-        )
-        return {
-            "accuracy": accuracy,
-            "hierarchical_precision": hP,
-            "hierarchical_recall": hR,
-            "hierarchical_fmeasure": hF,
-        }