Spaces:
Runtime error
Runtime error
import os | |
from typing import Dict, Any, Optional, List | |
import re | |
from abc import ABC, abstractmethod | |
from huggingface_hub import (ModelCard, comment_discussion, | |
create_discussion, get_discussion_details, | |
get_repo_discussions) | |
import markdown | |
from bs4 import BeautifulSoup | |
from tabulate import tabulate | |
from difflib import SequenceMatcher | |
KEY = os.environ.get("KEY") | |
def similar(a, b): | |
"""Check similarity of two sequences""" | |
return SequenceMatcher(None, a, b).ratio() | |
class ComplianceCheck(ABC): | |
def __init__(self, name): | |
self.name = name | |
def check(self, card: BeautifulSoup) -> bool: | |
raise NotImplementedError | |
class ModelProviderIdentityCheck(ComplianceCheck): | |
def __init__(self): | |
super().__init__("Identity and Contact Details") | |
def check(self, card: BeautifulSoup): | |
developed_by_li = card.findAll(text=re.compile("Developed by"))[0].parent.parent | |
developed_by = list(developed_by_li.children)[1].text.strip() | |
if developed_by == "[More Information Needed]": | |
return False | |
else: | |
return True | |
class IntendedPurposeCheck(ComplianceCheck): | |
def __init__(self): | |
super().__init__("Intended Purpose") | |
def check(self, card: BeautifulSoup): | |
# direct_use = card.find_all("h2", text="Direct Use")[0] | |
# | |
# if developed_by == "[More Information Needed]": | |
# return False | |
# else: | |
return False | |
compliance_checks = [ | |
ModelProviderIdentityCheck(), | |
IntendedPurposeCheck() | |
# "General Limitations", | |
# "Computational and Hardware Requirements", | |
# "Carbon Emissions" | |
] | |
def parse_webhook_post(data: Dict[str, Any]) -> Optional[str]: | |
event = data["event"] | |
if event["scope"] != "repo": | |
return None | |
repo = data["repo"] | |
repo_name = repo["name"] | |
repo_type = repo["type"] | |
if repo_type != "model": | |
raise ValueError("Incorrect repo type.") | |
return repo_name | |
def check_compliance(comp_checks: List[ComplianceCheck], card: BeautifulSoup) -> Dict[str, bool]: | |
return {c.name: c.check(card) for c in comp_checks} | |
def run_compliance_check(repo_name): | |
card_data: ModelCard = ModelCard.load(repo_id_or_path=repo_name) | |
card_html = markdown.markdown(card_data.content) | |
card_soup = BeautifulSoup(card_html, features="html.parser") | |
compliance_results = check_compliance(compliance_checks, card_soup) | |
return compliance_results | |
def create_metadata_breakdown_table(compliance_check_dictionary): | |
data = {k: v for k, v in compliance_check_dictionary.items()} | |
metadata_fields_column = list(data.keys()) | |
metadata_values_column = list(data.values()) | |
table_data = list(zip(metadata_fields_column, metadata_values_column)) | |
return tabulate( | |
table_data, tablefmt="github", headers=("Compliance Check", "Present") | |
) | |
def create_markdown_report( | |
desired_metadata_dictionary, repo_name, update: bool = False | |
): | |
report = f"""# Model Card Regulatory Compliance report card {"(updated)" if update else ""} | |
\n | |
This is an automatically produced model card regulatory compliance report card for {repo_name}. | |
This report is meant as a POC! | |
\n | |
## Breakdown of metadata fields for your model | |
\n | |
{create_metadata_breakdown_table(desired_metadata_dictionary)} | |
\n | |
""" | |
return report | |
def create_or_update_report(compliance_check, repo_name): | |
report = create_markdown_report( | |
compliance_check, repo_name, update=False | |
) | |
repo_discussions = get_repo_discussions( | |
repo_name, | |
repo_type="model", | |
) | |
for discussion in repo_discussions: | |
if ( | |
discussion.title == "Metadata Report Card" and discussion.status == "open" | |
): # An existing open report card thread | |
discussion_details = get_discussion_details( | |
repo_name, discussion.num, repo_type="model" | |
) | |
last_comment = discussion_details.events[-1].content | |
if similar(report, last_comment) <= 0.999: | |
report = create_markdown_report( | |
compliance_check, | |
repo_name, | |
update=True, | |
) | |
comment_discussion( | |
repo_name, | |
discussion.num, | |
comment=report, | |
repo_type="model", | |
) | |
return True | |
create_discussion( | |
repo_name, | |
"Model Card Regulatory Compliance Report Card", | |
description=report, | |
repo_type="model", | |
) | |
return True | |