Spaces:
Sleeping
Sleeping
yangwang825
commited on
Commit
•
152b5cf
1
Parent(s):
061400f
Update datastats.py
Browse files- datastats.py +10 -15
datastats.py
CHANGED
@@ -46,14 +46,15 @@ _CITATION = """\
|
|
46 |
"""
|
47 |
|
48 |
_DESCRIPTION = """\
|
49 |
-
DataStats examines
|
50 |
"""
|
51 |
|
52 |
_KWARGS_DESCRIPTION = """
|
53 |
-
|
|
|
54 |
Args:
|
55 |
-
|
56 |
-
|
57 |
|
58 |
Returns:
|
59 |
coverage: Percentage of words in the summary that are from the source article, measuring the extent to which a summary is a derivative of a text.
|
@@ -270,14 +271,8 @@ class DataStats(evaluate.Metric):
|
|
270 |
features=[
|
271 |
datasets.Features(
|
272 |
{
|
273 |
-
"
|
274 |
-
"
|
275 |
-
}
|
276 |
-
),
|
277 |
-
datasets.Features(
|
278 |
-
{
|
279 |
-
"predictions": datasets.Value("string", id="sequence"),
|
280 |
-
"references": datasets.Value("string", id="sequence"),
|
281 |
}
|
282 |
),
|
283 |
],
|
@@ -290,8 +285,8 @@ class DataStats(evaluate.Metric):
|
|
290 |
|
291 |
def _compute(
|
292 |
self,
|
293 |
-
|
294 |
-
|
295 |
n_gram: int = 3,
|
296 |
n_workers: int = 4,
|
297 |
lowercase: bool = False,
|
@@ -300,7 +295,7 @@ class DataStats(evaluate.Metric):
|
|
300 |
logger.info(predictions)
|
301 |
logger.info(references)
|
302 |
datastats = DataStatsMetric(n_gram, n_workers, lowercase, tokenize)
|
303 |
-
results = datastats.evaluate_batch(
|
304 |
coverage = float(results['coverage'])
|
305 |
density = float(results['density'])
|
306 |
compression = float(results['compression'])
|
|
|
46 |
"""
|
47 |
|
48 |
_DESCRIPTION = """\
|
49 |
+
DataStats examines summarisation strategies using three measures that capture the degree of text overlap between the summary and article, and the rate of compression of the information conveyed.
|
50 |
"""
|
51 |
|
52 |
_KWARGS_DESCRIPTION = """
|
53 |
+
DataStats metric for text summarisation.
|
54 |
+
|
55 |
Args:
|
56 |
+
summaries (list of str): model-generated summries.
|
57 |
+
articles (list of str or list of list of str): Original articles.
|
58 |
|
59 |
Returns:
|
60 |
coverage: Percentage of words in the summary that are from the source article, measuring the extent to which a summary is a derivative of a text.
|
|
|
271 |
features=[
|
272 |
datasets.Features(
|
273 |
{
|
274 |
+
"summaries": datasets.Value("string", id="sequence"),
|
275 |
+
"articles": datasets.Value("string", id="sequence"),
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
}
|
277 |
),
|
278 |
],
|
|
|
285 |
|
286 |
def _compute(
|
287 |
self,
|
288 |
+
summaries,
|
289 |
+
articles,
|
290 |
n_gram: int = 3,
|
291 |
n_workers: int = 4,
|
292 |
lowercase: bool = False,
|
|
|
295 |
logger.info(predictions)
|
296 |
logger.info(references)
|
297 |
datastats = DataStatsMetric(n_gram, n_workers, lowercase, tokenize)
|
298 |
+
results = datastats.evaluate_batch(summaries, articles)
|
299 |
coverage = float(results['coverage'])
|
300 |
density = float(results['density'])
|
301 |
compression = float(results['compression'])
|