yangwang825 commited on
Commit
152b5cf
1 Parent(s): 061400f

Update datastats.py

Browse files
Files changed (1) hide show
  1. datastats.py +10 -15
datastats.py CHANGED
@@ -46,14 +46,15 @@ _CITATION = """\
46
  """
47
 
48
  _DESCRIPTION = """\
49
- DataStats examines summarization strategies using three measures that capture the degree of text overlap between the summary and article, and the rate of compression of the information conveyed.
50
  """
51
 
52
  _KWARGS_DESCRIPTION = """
53
- BERTScore Metrics with the hashcode from a source against one or more references.
 
54
  Args:
55
- predictions (list of str): Prediction/candidate sentences.
56
- references (list of str or list of list of str): Reference sentences.
57
 
58
  Returns:
59
  coverage: Percentage of words in the summary that are from the source article, measuring the extent to which a summary is a derivative of a text.
@@ -270,14 +271,8 @@ class DataStats(evaluate.Metric):
270
  features=[
271
  datasets.Features(
272
  {
273
- "predictions": datasets.Value("string", id="sequence"),
274
- "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
275
- }
276
- ),
277
- datasets.Features(
278
- {
279
- "predictions": datasets.Value("string", id="sequence"),
280
- "references": datasets.Value("string", id="sequence"),
281
  }
282
  ),
283
  ],
@@ -290,8 +285,8 @@ class DataStats(evaluate.Metric):
290
 
291
  def _compute(
292
  self,
293
- predictions,
294
- references,
295
  n_gram: int = 3,
296
  n_workers: int = 4,
297
  lowercase: bool = False,
@@ -300,7 +295,7 @@ class DataStats(evaluate.Metric):
300
  logger.info(predictions)
301
  logger.info(references)
302
  datastats = DataStatsMetric(n_gram, n_workers, lowercase, tokenize)
303
- results = datastats.evaluate_batch(predictions, references)
304
  coverage = float(results['coverage'])
305
  density = float(results['density'])
306
  compression = float(results['compression'])
 
46
  """
47
 
48
  _DESCRIPTION = """\
49
+ DataStats examines summarisation strategies using three measures that capture the degree of text overlap between the summary and article, and the rate of compression of the information conveyed.
50
  """
51
 
52
  _KWARGS_DESCRIPTION = """
53
+ DataStats metric for text summarisation.
54
+
55
  Args:
56
+ summaries (list of str): model-generated summries.
57
+ articles (list of str or list of list of str): Original articles.
58
 
59
  Returns:
60
  coverage: Percentage of words in the summary that are from the source article, measuring the extent to which a summary is a derivative of a text.
 
271
  features=[
272
  datasets.Features(
273
  {
274
+ "summaries": datasets.Value("string", id="sequence"),
275
+ "articles": datasets.Value("string", id="sequence"),
 
 
 
 
 
 
276
  }
277
  ),
278
  ],
 
285
 
286
  def _compute(
287
  self,
288
+ summaries,
289
+ articles,
290
  n_gram: int = 3,
291
  n_workers: int = 4,
292
  lowercase: bool = False,
 
295
  logger.info(predictions)
296
  logger.info(references)
297
  datastats = DataStatsMetric(n_gram, n_workers, lowercase, tokenize)
298
+ results = datastats.evaluate_batch(summaries, articles)
299
  coverage = float(results['coverage'])
300
  density = float(results['density'])
301
  compression = float(results['compression'])