File size: 2,616 Bytes

43b7e92

import json
import logging
import os
from collections import defaultdict
from pathlib import Path

from huggingface_hub import HfApi, ModelFilter

import diffusers


PATH_TO_REPO = Path(__file__).parent.parent.resolve()
ALWAYS_TEST_PIPELINE_MODULES = [
    "controlnet",
    "stable_diffusion",
    "stable_diffusion_2",
    "stable_diffusion_xl",
    "stable_diffusion_adapter",
    "deepfloyd_if",
    "ip_adapters",
    "kandinsky",
    "kandinsky2_2",
    "text_to_video_synthesis",
    "wuerstchen",
]
PIPELINE_USAGE_CUTOFF = int(os.getenv("PIPELINE_USAGE_CUTOFF", 50000))

logger = logging.getLogger(__name__)
api = HfApi()
filter = ModelFilter(library="diffusers")


def filter_pipelines(usage_dict, usage_cutoff=10000):
    output = []
    for diffusers_object, usage in usage_dict.items():
        if usage < usage_cutoff:
            continue

        is_diffusers_pipeline = hasattr(diffusers.pipelines, diffusers_object)
        if not is_diffusers_pipeline:
            continue

        output.append(diffusers_object)

    return output


def fetch_pipeline_objects():
    models = api.list_models(filter=filter)
    downloads = defaultdict(int)

    for model in models:
        is_counted = False
        for tag in model.tags:
            if tag.startswith("diffusers:"):
                is_counted = True
                downloads[tag[len("diffusers:") :]] += model.downloads

        if not is_counted:
            downloads["other"] += model.downloads

    # Remove 0 downloads
    downloads = {k: v for k, v in downloads.items() if v > 0}
    pipeline_objects = filter_pipelines(downloads, PIPELINE_USAGE_CUTOFF)

    return pipeline_objects


def fetch_pipeline_modules_to_test():
    try:
        pipeline_objects = fetch_pipeline_objects()
    except Exception as e:
        logger.error(e)
        raise RuntimeError("Unable to fetch model list from HuggingFace Hub.")

    test_modules = []
    for pipeline_name in pipeline_objects:
        module = getattr(diffusers, pipeline_name)

        test_module = module.__module__.split(".")[-2].strip()
        test_modules.append(test_module)

    return test_modules


def main():
    test_modules = fetch_pipeline_modules_to_test()
    test_modules.extend(ALWAYS_TEST_PIPELINE_MODULES)

    # Get unique modules
    test_modules = list(set(test_modules))
    print(json.dumps(test_modules))

    save_path = f"{PATH_TO_REPO}/reports"
    os.makedirs(save_path, exist_ok=True)

    with open(f"{save_path}/test-pipelines.json", "w") as f:
        json.dump({"pipeline_test_modules": test_modules}, f)


if __name__ == "__main__":
    main()