Spaces:
Runtime error
Runtime error
NimaBoscarino
commited on
Commit
·
11bd448
1
Parent(s):
7a3d7a6
WIP: Compliance Check pipeline w/ gradio app
Browse files- app.py +27 -0
- compliance_checks.py +59 -0
- main.py +1 -77
- requirements.txt +7 -5
- tests/test_compliance_checks.py +200 -0
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import ModelCard
|
3 |
+
from compliance_checks import (
|
4 |
+
ComplianceSuite,
|
5 |
+
ModelProviderIdentityCheck,
|
6 |
+
IntendedPurposeCheck
|
7 |
+
)
|
8 |
+
|
9 |
+
def run_compliance_check(repo_name):
|
10 |
+
model_card = ModelCard.load(repo_id_or_path=repo_name).content
|
11 |
+
|
12 |
+
suite = ComplianceSuite(checks=[
|
13 |
+
ModelProviderIdentityCheck(),
|
14 |
+
IntendedPurposeCheck()
|
15 |
+
])
|
16 |
+
|
17 |
+
results = suite.run(model_card)
|
18 |
+
|
19 |
+
return str(results)
|
20 |
+
|
21 |
+
|
22 |
+
gr.Interface(
|
23 |
+
fn=run_compliance_check,
|
24 |
+
inputs="text",
|
25 |
+
outputs="text",
|
26 |
+
examples=[["society-ethics/model-card-webhook-test"]]
|
27 |
+
).launch()
|
compliance_checks.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
|
3 |
+
import markdown
|
4 |
+
from bs4 import BeautifulSoup, Comment
|
5 |
+
|
6 |
+
|
7 |
+
class ComplianceCheck(ABC):
|
8 |
+
@abstractmethod
|
9 |
+
def run_check(self, card: BeautifulSoup) -> bool:
|
10 |
+
raise NotImplementedError
|
11 |
+
|
12 |
+
|
13 |
+
class ModelProviderIdentityCheck(ComplianceCheck):
|
14 |
+
def run_check(self, card: BeautifulSoup):
|
15 |
+
try:
|
16 |
+
model_description = card.find("h3", string="Model Description")
|
17 |
+
description_list = model_description.find_next_siblings()[0]
|
18 |
+
developer = description_list.find(string="Developed by:").parent.next_sibling.strip()
|
19 |
+
|
20 |
+
if developer == "[More Information Needed]":
|
21 |
+
return False, None
|
22 |
+
|
23 |
+
return True, developer
|
24 |
+
except AttributeError:
|
25 |
+
return False, None
|
26 |
+
|
27 |
+
|
28 |
+
class IntendedPurposeCheck(ComplianceCheck):
|
29 |
+
def run_check(self, card: BeautifulSoup):
|
30 |
+
try:
|
31 |
+
direct_use = card.find("h3", string="Direct Use")
|
32 |
+
|
33 |
+
direct_use_content = ""
|
34 |
+
|
35 |
+
sibling_gen = direct_use.nextSiblingGenerator()
|
36 |
+
sibling = next(sibling_gen)
|
37 |
+
|
38 |
+
while sibling.name != "h3":
|
39 |
+
if not isinstance(sibling, Comment):
|
40 |
+
direct_use_content = direct_use_content + sibling.text
|
41 |
+
sibling = next(sibling_gen)
|
42 |
+
|
43 |
+
if direct_use_content.strip() == "[More Information Needed]":
|
44 |
+
return False, None
|
45 |
+
|
46 |
+
return True, None
|
47 |
+
except AttributeError:
|
48 |
+
return False, None
|
49 |
+
|
50 |
+
|
51 |
+
class ComplianceSuite:
|
52 |
+
def __init__(self, checks):
|
53 |
+
self.checks = checks
|
54 |
+
|
55 |
+
def run(self, model_card):
|
56 |
+
model_card_html = markdown.markdown(model_card)
|
57 |
+
card_soup = BeautifulSoup(model_card_html, features="html.parser")
|
58 |
+
|
59 |
+
return [c.run_check(card_soup) for c in self.checks]
|
main.py
CHANGED
@@ -1,13 +1,8 @@
|
|
1 |
import os
|
2 |
-
from typing import Dict, Any, Optional, List
|
3 |
-
import re
|
4 |
-
from abc import ABC, abstractmethod
|
5 |
|
6 |
-
from huggingface_hub import (
|
7 |
create_discussion, get_discussion_details,
|
8 |
get_repo_discussions)
|
9 |
-
import markdown
|
10 |
-
from bs4 import BeautifulSoup
|
11 |
from tabulate import tabulate
|
12 |
from difflib import SequenceMatcher
|
13 |
|
@@ -19,77 +14,6 @@ def similar(a, b):
|
|
19 |
return SequenceMatcher(None, a, b).ratio()
|
20 |
|
21 |
|
22 |
-
class ComplianceCheck(ABC):
|
23 |
-
def __init__(self, name):
|
24 |
-
self.name = name
|
25 |
-
|
26 |
-
@abstractmethod
|
27 |
-
def check(self, card: BeautifulSoup) -> bool:
|
28 |
-
raise NotImplementedError
|
29 |
-
|
30 |
-
|
31 |
-
class ModelProviderIdentityCheck(ComplianceCheck):
|
32 |
-
def __init__(self):
|
33 |
-
super().__init__("Identity and Contact Details")
|
34 |
-
|
35 |
-
def check(self, card: BeautifulSoup):
|
36 |
-
developed_by_li = card.findAll(text=re.compile("Developed by"))[0].parent.parent
|
37 |
-
developed_by = list(developed_by_li.children)[1].text.strip()
|
38 |
-
|
39 |
-
if developed_by == "[More Information Needed]":
|
40 |
-
return False
|
41 |
-
else:
|
42 |
-
return True
|
43 |
-
|
44 |
-
|
45 |
-
class IntendedPurposeCheck(ComplianceCheck):
|
46 |
-
def __init__(self):
|
47 |
-
super().__init__("Intended Purpose")
|
48 |
-
|
49 |
-
def check(self, card: BeautifulSoup):
|
50 |
-
|
51 |
-
# direct_use = card.find_all("h2", text="Direct Use")[0]
|
52 |
-
#
|
53 |
-
# if developed_by == "[More Information Needed]":
|
54 |
-
# return False
|
55 |
-
# else:
|
56 |
-
return False
|
57 |
-
|
58 |
-
|
59 |
-
compliance_checks = [
|
60 |
-
ModelProviderIdentityCheck(),
|
61 |
-
IntendedPurposeCheck()
|
62 |
-
# "General Limitations",
|
63 |
-
# "Computational and Hardware Requirements",
|
64 |
-
# "Carbon Emissions"
|
65 |
-
]
|
66 |
-
|
67 |
-
|
68 |
-
def parse_webhook_post(data: Dict[str, Any]) -> Optional[str]:
|
69 |
-
event = data["event"]
|
70 |
-
if event["scope"] != "repo":
|
71 |
-
return None
|
72 |
-
repo = data["repo"]
|
73 |
-
repo_name = repo["name"]
|
74 |
-
repo_type = repo["type"]
|
75 |
-
if repo_type != "model":
|
76 |
-
raise ValueError("Incorrect repo type.")
|
77 |
-
return repo_name
|
78 |
-
|
79 |
-
|
80 |
-
def check_compliance(comp_checks: List[ComplianceCheck], card: BeautifulSoup) -> Dict[str, bool]:
|
81 |
-
return {c.name: c.check(card) for c in comp_checks}
|
82 |
-
|
83 |
-
|
84 |
-
def run_compliance_check(repo_name):
|
85 |
-
card_data: ModelCard = ModelCard.load(repo_id_or_path=repo_name)
|
86 |
-
card_html = markdown.markdown(card_data.content)
|
87 |
-
card_soup = BeautifulSoup(card_html, features="html.parser")
|
88 |
-
compliance_results = check_compliance(compliance_checks, card_soup)
|
89 |
-
|
90 |
-
return compliance_results
|
91 |
-
|
92 |
-
|
93 |
def create_metadata_breakdown_table(compliance_check_dictionary):
|
94 |
data = {k: v for k, v in compliance_check_dictionary.items()}
|
95 |
metadata_fields_column = list(data.keys())
|
|
|
1 |
import os
|
|
|
|
|
|
|
2 |
|
3 |
+
from huggingface_hub import (comment_discussion,
|
4 |
create_discussion, get_discussion_details,
|
5 |
get_repo_discussions)
|
|
|
|
|
6 |
from tabulate import tabulate
|
7 |
from difflib import SequenceMatcher
|
8 |
|
|
|
14 |
return SequenceMatcher(None, a, b).ratio()
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def create_metadata_breakdown_table(compliance_check_dictionary):
|
18 |
data = {k: v for k, v in compliance_check_dictionary.items()}
|
19 |
metadata_fields_column = list(data.keys())
|
requirements.txt
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
-
fastapi
|
2 |
-
uvicorn
|
3 |
-
markdown
|
4 |
-
beautifulsoup4
|
5 |
-
tabulate
|
|
|
|
|
|
1 |
+
# fastapi
|
2 |
+
# uvicorn
|
3 |
+
# markdown
|
4 |
+
# beautifulsoup4
|
5 |
+
# tabulate
|
6 |
+
# pytest
|
7 |
+
gradio
|
tests/test_compliance_checks.py
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from unittest.mock import MagicMock
|
3 |
+
|
4 |
+
import markdown
|
5 |
+
from bs4 import BeautifulSoup, Comment
|
6 |
+
from compliance_checks import ComplianceSuite, ModelProviderIdentityCheck, IntendedPurposeCheck
|
7 |
+
|
8 |
+
|
9 |
+
class TestComplianceCheck:
|
10 |
+
@pytest.fixture
|
11 |
+
def provider_identity_model_card(self):
|
12 |
+
return """
|
13 |
+
# Model Card for Sample Model
|
14 |
+
|
15 |
+
Some random info...
|
16 |
+
|
17 |
+
## Model Details
|
18 |
+
|
19 |
+
### Model Description
|
20 |
+
|
21 |
+
<!-- Provide a longer summary of what this model is. -->
|
22 |
+
|
23 |
+
- **Developed by:** Nima Boscarino
|
24 |
+
- **Model type:** Yada yada yada
|
25 |
+
"""
|
26 |
+
|
27 |
+
@pytest.fixture
|
28 |
+
def bad_provider_identity_model_card(self):
|
29 |
+
return """
|
30 |
+
# Model Card for Sample Model
|
31 |
+
|
32 |
+
Some random info...
|
33 |
+
|
34 |
+
## Model Details
|
35 |
+
|
36 |
+
### Model Description
|
37 |
+
|
38 |
+
- **Developed by:** [More Information Needed]
|
39 |
+
- **Model type:** Yada yada yada
|
40 |
+
"""
|
41 |
+
|
42 |
+
@pytest.fixture
|
43 |
+
def intended_purpose_model_card(self):
|
44 |
+
return """
|
45 |
+
# Model Card for Sample Model
|
46 |
+
|
47 |
+
Some random info...
|
48 |
+
|
49 |
+
## Uses
|
50 |
+
|
51 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
52 |
+
|
53 |
+
### Direct Use
|
54 |
+
|
55 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
56 |
+
|
57 |
+
Here is some info about direct uses...
|
58 |
+
|
59 |
+
### Downstream Use [optional]
|
60 |
+
|
61 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
62 |
+
|
63 |
+
[More Information Needed]
|
64 |
+
|
65 |
+
### Out-of-Scope Use
|
66 |
+
|
67 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
68 |
+
|
69 |
+
[More Information Needed]
|
70 |
+
|
71 |
+
## Bias, Risks, and Limitations
|
72 |
+
|
73 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
74 |
+
|
75 |
+
[More Information Needed]
|
76 |
+
"""
|
77 |
+
|
78 |
+
@pytest.fixture
|
79 |
+
def bad_intended_purpose_model_card(self):
|
80 |
+
return """
|
81 |
+
# Model Card for Sample Model
|
82 |
+
|
83 |
+
Some random info...
|
84 |
+
|
85 |
+
## Uses
|
86 |
+
|
87 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
88 |
+
|
89 |
+
### Direct Use
|
90 |
+
|
91 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
92 |
+
|
93 |
+
[More Information Needed]
|
94 |
+
|
95 |
+
### Downstream Use [optional]
|
96 |
+
|
97 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
98 |
+
|
99 |
+
[More Information Needed]
|
100 |
+
|
101 |
+
### Out-of-Scope Use
|
102 |
+
|
103 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
104 |
+
|
105 |
+
[More Information Needed]
|
106 |
+
|
107 |
+
## Bias, Risks, and Limitations
|
108 |
+
|
109 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
110 |
+
|
111 |
+
[More Information Needed]
|
112 |
+
"""
|
113 |
+
|
114 |
+
@pytest.mark.parametrize("check, card,check_passed,values", [
|
115 |
+
(ModelProviderIdentityCheck(), "provider_identity_model_card", True, "Nima Boscarino"),
|
116 |
+
(ModelProviderIdentityCheck(), "bad_provider_identity_model_card", False, None),
|
117 |
+
(IntendedPurposeCheck(), "intended_purpose_model_card", True, None),
|
118 |
+
(IntendedPurposeCheck(), "bad_intended_purpose_model_card", False, None),
|
119 |
+
])
|
120 |
+
def test_run_model_provider_identity_check(self, check, card, check_passed, values, request):
|
121 |
+
card = request.getfixturevalue(card)
|
122 |
+
|
123 |
+
model_card_html = markdown.markdown(card)
|
124 |
+
card_soup = BeautifulSoup(model_card_html, features="html.parser")
|
125 |
+
|
126 |
+
results_check_passed, results_values = check.run_check(card_soup)
|
127 |
+
|
128 |
+
assert results_check_passed == check_passed
|
129 |
+
assert results_values == values
|
130 |
+
|
131 |
+
|
132 |
+
class TestComplianceSuite:
|
133 |
+
@pytest.fixture
|
134 |
+
def mock_compliance_check(self):
|
135 |
+
mockComplianceCheck = MagicMock()
|
136 |
+
mockComplianceCheck.run_check = MagicMock(return_value=True)
|
137 |
+
|
138 |
+
return mockComplianceCheck
|
139 |
+
|
140 |
+
@pytest.fixture
|
141 |
+
def empty_compliance_suite(self):
|
142 |
+
return ComplianceSuite(
|
143 |
+
checks=[]
|
144 |
+
)
|
145 |
+
|
146 |
+
@pytest.fixture
|
147 |
+
def compliance_suite(self, mock_compliance_check):
|
148 |
+
return ComplianceSuite(
|
149 |
+
checks=[mock_compliance_check]
|
150 |
+
)
|
151 |
+
|
152 |
+
@pytest.fixture
|
153 |
+
def empty_compliance_results(self):
|
154 |
+
return []
|
155 |
+
|
156 |
+
@pytest.fixture
|
157 |
+
def compliance_results(self):
|
158 |
+
return [True]
|
159 |
+
|
160 |
+
def test_create_empty_compliance_suite(self, empty_compliance_suite):
|
161 |
+
assert len(empty_compliance_suite.checks) == 0
|
162 |
+
|
163 |
+
def test_create_compliance_suite(self, compliance_suite):
|
164 |
+
assert len(compliance_suite.checks) == 1
|
165 |
+
|
166 |
+
@pytest.mark.parametrize("suite,results", [
|
167 |
+
("empty_compliance_suite", "empty_compliance_results"),
|
168 |
+
("compliance_suite", "compliance_results")
|
169 |
+
])
|
170 |
+
def test_run_compliance_suite(self, suite, results, request):
|
171 |
+
suite: ComplianceSuite = request.getfixturevalue(suite)
|
172 |
+
results: list = request.getfixturevalue(results)
|
173 |
+
assert suite.run("") == results
|
174 |
+
|
175 |
+
for check in suite.checks:
|
176 |
+
check.run_check.assert_called_once()
|
177 |
+
|
178 |
+
|
179 |
+
class TestEndToEnd:
|
180 |
+
@pytest.mark.parametrize("card", [
|
181 |
+
("""
|
182 |
+
# Model Card for Sample Model
|
183 |
+
|
184 |
+
Some random info...
|
185 |
+
|
186 |
+
## Model Details
|
187 |
+
|
188 |
+
### Model Description
|
189 |
+
|
190 |
+
- **Developed by:** Nima Boscarino
|
191 |
+
- **Model type:** Yada yada yada
|
192 |
+
""")
|
193 |
+
])
|
194 |
+
def test_end_to_end_compliance_suite(self, card):
|
195 |
+
suite = ComplianceSuite(checks=[
|
196 |
+
ModelProviderIdentityCheck(),
|
197 |
+
IntendedPurposeCheck(),
|
198 |
+
])
|
199 |
+
|
200 |
+
suite.run(card)
|