from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # task_key in the json file, metric_key in the json file, name to display in the leaderboard # task0 = Task("anli_r1", "acc", "ANLI") # task1 = Task("logiqa", "acc_norm", "LogiQA") task0 = Task("Follow", "accuracy", "Follow ↑") task1 = Task("Predict", "accuracy", "Predict ↑") task2 = Task("Coevolve", "accuracy", "Coevolve ↑") NUM_FEWSHOT = 0 # Change with your few shot # --------------------------------------------------- # Your leaderboard name TITLE = """

ProgressGym Leaderboard

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ Human values are evolving and have undergone huge, continual progress over the past millennium. Values embedded into the LLMs need to undergo the same process, or else we risk *locking-in* current human values by putting humans into an echo chamber of like-minded LLMs. This concern is especially salient when LLMs have become personal assistants, romantic partners, K-12 educators, etc., and [psychological studies](https://arxiv.org/abs/2302.00560) have demonstrated very significant impact of LLMs on human views. ProgressGym-LeaderBoard is an open leaderboard for *progress alignment* algorithms - algorithms which learn and emulate the mechanics of moral progress, in order to facilitate continual improvements in real-world value decisions. Refer to the [ProgressGym paper](https://arxiv.org/abs/2406.20087) for more details. """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f"""""" EVALUATION_QUEUE_TEXT = """ ## Steps to submit your progress alignment algorithm To submit your progress alignment algorithm, please follow the following steps. Note that we are in the process of compiling a full documentation for the submission process, and for the usage of the ProgressGym framework in general. We will update this page with more detailed instructions when we release the full codebase in a few days - please [stay tuned](https://docs.google.com/forms/d/e/1FAIpQLSd0ofaMypr7YJhUbdjNgaEQ3BV2aam5joCM7pLj1IRvprLrYA/viewform)! #### Step 1: Clone the ProgressGym codebase [More explanation coming] #### Step 2: Implement your progress alignment algorithm as an `Examinee` class [More explanation coming] #### Step 3: Run the benchmark script on all challenges [More explanation coming] #### Step 4: Submit the generated results as a JSON file [More explanation coming] """ ABOUT_TEXT = """ProgressGym-LeaderBoard is tightly coupled with the [ProgressGym](https://huggingface.co/collections/PKU-Alignment/progressgym-666735fcf3e4efa276226eaa) experimental framework for progress alignment research, which provides historical datasets, historical LLMs, simulation environments, algorithm implementations, and benchmarks on progress alignment challenges.""" CITATION_BUTTON_LABEL = "Please copy the following bibtex entry to cite the ProgressGym project" CITATION_BUTTON_TEXT = r"""@article{progressgym, title={ProgressGym: Alignment with a Millennium of Moral Progress}, author={Tianyi Qiu and Yang Zhang and Xuchuan Huang and Jasmine Xinze Li and Jiaming Ji and Yaodong Yang}, journal={arXiv preprint arXiv:2406.20087}, eprint={2406.20087}, eprinttype = {arXiv}, year={2024} } """ SUBMIT_CHALLENGE_TEXT = """ ## Steps to submit your progress alignment challenge To submit your progress alignment challenge, please follow the following steps. Note that we are in the process of compiling a full documentation for the submission process, and for the usage of the ProgressGym framework in general. We will update this page with more detailed instructions when we release the full codebase in a few days - please [stay tuned](https://docs.google.com/forms/d/e/1FAIpQLSd0ofaMypr7YJhUbdjNgaEQ3BV2aam5joCM7pLj1IRvprLrYA/viewform)! #### Step 1: Clone the ProgressGym codebase #### Step 2: Implement your progress alignment challenge as a `Judge` class #### Step 3 (optional but recommended): Run the benchmark script on all challenges #### Step 4: Submit the link to your codebase and (optional but recommended) the generated results as a JSON file """