Spaces:

libraxiong
/

oppo_refuse_match

Runtime error

App Files Files Community

libraxiong commited on May 8, 2024

Commit

f4c0c85

1 Parent(s): 29f7723

add oppo_refuse_match.py

Browse files

Files changed (4) hide show

app.py +6 -0
eval.py +40 -0
oppo_refuse_match.py +67 -0
requirements.txt +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import evaluate
+from evaluate.utils import launch_gradio_widget
+module = evaluate.load("libraxiong/oppo_refuse_match")
+launch_gradio_widget(module)

eval.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import regex
+import unicodedata
+class SimpleTokenizer(object):
+    ALPHA_NUM = r'[\p{L}\p{N}\p{M}]+'
+    NON_WS = r'[^\p{Z}\p{C}]'
+    def __init__(self):
+        """
+        Args:
+            annotators: None or empty set (only tokenizes).
+        """
+        self._regexp = regex.compile(
+            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
+            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
+        )
+    def tokenize(self, text, uncased=False):
+        matches = [m for m in self._regexp.finditer(text)]
+        if uncased:
+            tokens = [m.group().lower() for m in matches]
+        else:
+            tokens = [m.group() for m in matches]
+        return tokens
+def _normalize(text):
+    return unicodedata.normalize('NFD', text)
+def has_answer(text, answers, tokenizer=SimpleTokenizer()) -> bool:
+    """Check if a document contains an answer string."""
+    text = _normalize(text)
+    text = tokenizer.tokenize(text, uncased=True)
+    for answer in answers:
+        answer = _normalize(answer)
+        answer = tokenizer.tokenize(answer, uncased=True)
+        for i in range(0, len(text) - len(answer) + 1):
+            if answer == text[i: i + len(answer)]:
+                return True
+    return False

oppo_refuse_match.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Oppo Refuse Match metric."""
+import re
+import string
+import datasets
+import numpy as np
+import evaluate
+from eval import has_answer
+_DESCRIPTION = """
+Returns the rate at which the input predicted strings exactly match the refuse list
+"""
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions: List of predicted texts.
+Returns:
+    exact_match: Dictionary containing oppo_refuse_match rate. Possible values are 0 or 1
+Examples:
+"""
+_CITATION = """ the dpr exact match
+"""
+@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class OppoRefuseMatch(evaluate.Metric):
+    def _info(self):
+        return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("string", id="sequence"),
+                }
+            ),
+            reference_urls=[],
+        )
+    def _compute(
+        self,
+        predictions
+    ):
+        patterns = [
+        r"There is no", r"no", r"non-existent", r"not a", r"none"
+    ]
+        if has_answer(predictions,patterns):
+            return {"oppo_refuse_match": 1}
+        else:
+            return {"oppo_refuse_match": 0}

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ git+https://github.com/huggingface/evaluate@a4bdc10c48a450b978d91389a48dbb5297835c7d