alvations commited on
Commit
2dfd7fe
1 Parent(s): 0c9be51
Files changed (3) hide show
  1. app.py +6 -0
  2. requirements.txt +1 -0
  3. unitemup.py +90 -0
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import evaluate
2
+ from evaluate.utils import launch_gradio_widget
3
+
4
+
5
+ module = evaluate.load("unite_mup")
6
+ launch_gradio_widget(module)
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ unbabel-comet>=2.0.1
unitemup.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ import os
14
+ import pathlib
15
+
16
+ import datasets
17
+ import evaluate
18
+ from huggingface_hub import snapshot_download
19
+
20
+ from comet.models.multitask.unified_metric import UnifiedMetric
21
+
22
+
23
+ _CITATION = """\
24
+ @inproceedings{wan-etal-2022-unite,
25
+ title = "{U}ni{TE}: Unified Translation Evaluation",
26
+ author = "Wan, Yu and
27
+ Liu, Dayiheng and
28
+ Yang, Baosong and
29
+ Zhang, Haibo and
30
+ Chen, Boxing and
31
+ Wong, Derek and
32
+ Chao, Lidia",
33
+ booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
34
+ month = may,
35
+ year = "2022",
36
+ address = "Dublin, Ireland",
37
+ publisher = "Association for Computational Linguistics",
38
+ url = "https://aclanthology.org/2022.acl-long.558",
39
+ doi = "10.18653/v1/2022.acl-long.558",
40
+ pages = "8117--8127",
41
+ }
42
+ """
43
+
44
+
45
+ _DESCRIPTION = """\
46
+ From https://huggingface.co/Unbabel/unite-mup
47
+ """
48
+
49
+ class UNITEMUP(evaluate.Metric):
50
+ def _info(self):
51
+ return evaluate.MetricInfo(
52
+ description=_DESCRIPTION,
53
+ citation=_CITATION,
54
+ features=datasets.Features(
55
+ {
56
+ "predictions": datasets.Value("string"),
57
+ "references": datasets.Value("string"),
58
+ }
59
+ ),
60
+ )
61
+
62
+ def _download_and_prepare(self, dl_manager):
63
+ try:
64
+ model_checkpoint_path = next(pathlib.Path('./models--Unbabel--unite-mup/').rglob('*.ckpt'))
65
+ self.model = UnifiedMetric.load_from_checkpoint(model_checkpoint_path)
66
+ except:
67
+ model_path = snapshot_download(repo_id="Unbabel/unite-mup", cache_dir=os.path.abspath(os.path.dirname('.')))
68
+ model_checkpoint_path = f"{model_path}/checkpoints/model.ckpt"
69
+ self.model = UnifiedMetric.load_from_checkpoint(model_checkpoint_path)
70
+
71
+
72
+ def _compute(
73
+ self,
74
+ predictions,
75
+ references,
76
+ data_keys=None,
77
+ ): # Allows user to use either source inputs or reference translations as ground truth.
78
+ data = [{data_keys[0]: p, data_keys[1]: r} for p, r in zip(predictions, references)]
79
+ return {"scores": self.model.predict(data, batch_size=8).scores}
80
+
81
+
82
+ def compute_triplet(
83
+ self,
84
+ predictions,
85
+ references,
86
+ sources,
87
+ ): # Unified scores, uses sources, hypotheses and references.
88
+ data = [{"src": s, "mt": p, "ref": r} for s, p, r in zip(sources, predictions, references)]
89
+ return {"scores": self.model.predict(data, batch_size=8).metadata.unified_scores}
90
+