Spaces:

yangwang825
/

datastats

Sleeping

App Files Files Community

yangwang825 commited on Jun 6

Commit

152b5cf

•

1 Parent(s): 061400f

Update datastats.py

Browse files

Files changed (1) hide show

datastats.py +10 -15

datastats.py CHANGED Viewed

@@ -46,14 +46,15 @@ _CITATION = """\
 """
 _DESCRIPTION = """\
-DataStats examines summarization strategies using three measures that capture the degree of text overlap between the summary and article, and the rate of compression of the information conveyed.
 """
 _KWARGS_DESCRIPTION = """
-BERTScore Metrics with the hashcode from a source against one or more references.
 Args:
-    predictions (list of str): Prediction/candidate sentences.
-    references (list of str or list of list of str): Reference sentences.
 Returns:
     coverage: Percentage of words in the summary that are from the source article, measuring the extent to which a summary is a derivative of a text.
@@ -270,14 +271,8 @@ class DataStats(evaluate.Metric):
             features=[
                 datasets.Features(
                     {
-                        "predictions": datasets.Value("string", id="sequence"),
-                        "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
-                    }
-                ),
-                datasets.Features(
-                    {
-                        "predictions": datasets.Value("string", id="sequence"),
-                        "references": datasets.Value("string", id="sequence"),
                     }
                 ),
             ],
@@ -290,8 +285,8 @@ class DataStats(evaluate.Metric):
     def _compute(
         self,
-        predictions,
-        references,
         n_gram: int = 3,
         n_workers: int = 4,
         lowercase: bool = False,
@@ -300,7 +295,7 @@ class DataStats(evaluate.Metric):
         logger.info(predictions)
         logger.info(references)
         datastats = DataStatsMetric(n_gram, n_workers, lowercase, tokenize)
-        results = datastats.evaluate_batch(predictions, references)
         coverage = float(results['coverage'])
         density = float(results['density'])
         compression = float(results['compression'])

 """
 _DESCRIPTION = """\
+DataStats examines summarisation strategies using three measures that capture the degree of text overlap between the summary and article, and the rate of compression of the information conveyed.
 """
 _KWARGS_DESCRIPTION = """
+DataStats metric for text summarisation.
 Args:
+    summaries (list of str): model-generated summries.
+    articles (list of str or list of list of str): Original articles.
 Returns:
     coverage: Percentage of words in the summary that are from the source article, measuring the extent to which a summary is a derivative of a text.
             features=[
                 datasets.Features(
                     {
+                        "summaries": datasets.Value("string", id="sequence"),
+                        "articles": datasets.Value("string", id="sequence"),
                     }
                 ),
             ],
     def _compute(
         self,
+        summaries,
+        articles,
         n_gram: int = 3,
         n_workers: int = 4,
         lowercase: bool = False,
         logger.info(predictions)
         logger.info(references)
         datastats = DataStatsMetric(n_gram, n_workers, lowercase, tokenize)
+        results = datastats.evaluate_batch(summaries, articles)
         coverage = float(results['coverage'])
         density = float(results['density'])
         compression = float(results['compression'])