Spaces:

macrocosm-os
/

pretraining-leaderboard

Runtime error

App Files Files Community

steffenc

CryptAL commited on Jun 12

Commit

c6826fb

•

1 Parent(s): 2201bcf

API refactor and benchmark enable

Browse files

* Add flask dependency

* Move common operations to utils for reuse in api

* Remove unused logging

* Remove refactored code

* Add prototype API

* Update utils.py

Co-authored-by: alan-aboudib-mc <alan.aboudib@macrocosmos.ai>

* Update utils.py

Co-authored-by: alan-aboudib-mc <alan.aboudib@macrocosmos.ai>

* Update utils.py

Co-authored-by: alan-aboudib-mc <alan.aboudib@macrocosmos.ai>

* Updated wandb benchmark project

---------

Co-authored-by: alan-aboudib-mc <alan.aboudib@macrocosmos.ai>

Files changed (4) hide show

api.py +112 -0
app.py +20 -333
requirements.txt +2 -0
utils.py +449 -0

api.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import utils
+import time
+import datetime
+import pandas as pd
+import bittensor as bt
+from typing import Dict, List, Any, Optional, Tuple
+from flask import Flask, request, jsonify
+app = Flask(__name__)
+# Global variables (saves time on loading data)
+state_vars = utils.test_load_state_vars()
+metagraph = state_vars["metagraph"]
+model_data = state_vars["model_data"]
+vali_runs = state_vars["vali_runs"]
+scores = state_vars["scores"]
+validator_df = state_vars["validator_df"]
+benchmarks = state_vars.get("benchmarks", None)
+benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
+@app.route('/', methods=['GET'])
+def home():
+    return "Welcome to the Bittensor Pretraining Leaderboard API!"
+@app.route('/reload', methods=['GET'])
+def reload():
+    """
+    Reload the state variables
+    """
+    global metagraph, model_data, vali_runs, scores, validator_df, benchmarks, benchmark_timestamp
+    state_vars = utils.load_state_vars()
+    metagraph = state_vars["metagraph"]
+    model_data = state_vars["model_data"]
+    vali_runs = state_vars["vali_runs"]
+    scores = state_vars["scores"]
+    validator_df = state_vars["validator_df"]
+    benchmarks = state_vars.get("benchmarks", None)
+    benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
+    return jsonify({"message": "State variables reloaded"})
+@app.route('/benchmark', methods=['GET'])
+def benchmark():
+    """
+    Get the benchmarks and the timestamp
+    Returns:
+    - benchmarks: List of dicts (from pandas DataFrame)
+    - benchmark_timestamp: String
+    """
+    return jsonify(
+        {
+            "benchmarks": benchmarks.to_dict(orient='records'),
+            "benchmark_timestamp": benchmark_timestamp.strftime('%Y-%m-%d %H:%M:%S')
+        }
+    )
+@app.route('/metagraph', methods=['GET'])
+def metagraph():
+    """
+    Get the metagraph data
+    Returns:
+    - metagraph_data: List of dicts (from pandas DataFrame)
+    """
+    return jsonify(
+        utils.make_metagraph_dataframe(metagraph).to_dict(orient='records')
+    )
+@app.route('/leaderboard', methods=['GET'])
+def leaderboard():
+    """
+    Get the leaderboard data
+    Returns:
+    - leaderboard_data: List of dicts (from pandas DataFrame)
+    """
+    show_stale = request.args.get('show_stale')
+    return jsonify(
+        utils.leaderboard_data(model_data, scores, show_stale=show_stale)
+        )
+@app.route('/loss', methods=['GET'])
+def loss():
+    """
+    Get the losses over time
+    Returns:
+    - losses_over_time: List of dicts (from pandas DataFrame)
+    """
+    return jsonify(
+        utils.get_losses_over_time(vali_runs).to_dict(orient='records')
+        )
+@app.route('/validator', methods=['GET'])
+def validator():
+    """
+    Get the validator data
+    Returns:
+    - validator_data: List of dicts (from pandas DataFrame)
+    """
+    return jsonify(
+        utils.make_validator_dataframe(validator_df, model_data).to_dict(orient='records')
+        )
+if __name__ == '__main__':
+    app.run(port=5000, debug=True)

app.py CHANGED Viewed

@@ -1,29 +1,14 @@
 # Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
-import argparse
-import functools
-import traceback
-import gradio as gr
-import bittensor as bt
-from typing import Dict, List, Any, Optional, Tuple
-from bittensor.extrinsics.serving import get_metadata
-from dataclasses import dataclass
-import wandb
-import math
 import os
 import datetime
-import time
-import json
-import pandas as pd
-import numpy as np
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
-import pandas as pd
-load_dotenv()
 FONT = (
     """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
@@ -33,239 +18,15 @@ HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/ma
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
-VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
-#BENCHMARK_WANDB_PROJECT = "raofoundation/pretraining-leaderboard-data"
-HF_TOKEN = os.environ.get("HF_TOKEN", None)
-API = HfApi(token=HF_TOKEN)
-WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
-SUBTENSOR_ENDPOINT=os.environ.get("SUBTENSOR_ENDPOINT", None)
-REPO_ID = "macrocosm-os/sn9"
-MAX_AVG_LOSS_POINTS = 1
-RETRIES = 5
-DELAY_SECS = 3
-NETUID = 9
-SECONDS_PER_BLOCK = 12
-@dataclass
-class ModelData:
-    uid: int
-    hotkey: str
-    namespace: str
-    name: str
-    commit: str
-    hash: str
-    block: int
-    incentive: float
-    emission: float
-    @classmethod
-    def from_compressed_str(
-        cls,
-        uid: int,
-        hotkey: str,
-        cs: str,
-        block: int,
-        incentive: float,
-        emission: float,
-    ):
-        """Returns an instance of this class from a compressed string representation"""
-        tokens = cs.split(":")
-        return ModelData(
-            uid=uid,
-            hotkey=hotkey,
-            namespace=tokens[0],
-            name=tokens[1],
-            commit=tokens[2] if tokens[2] != "None" else None,
-            hash=tokens[3] if tokens[3] != "None" else None,
-            block=block,
-            incentive=incentive,
-            emission=emission,
-        )
-def run_with_retries(func, *args, **kwargs):
-    for i in range(0, RETRIES):
-        try:
-            return func(*args, **kwargs)
-        except (Exception, RuntimeError):
-            if i == RETRIES - 1:
-                raise
-            time.sleep(DELAY_SECS)
-    raise RuntimeError("Should never happen")
-def get_subtensor_and_metagraph() -> Tuple[bt.subtensor, bt.metagraph]:
-    def _internal() -> Tuple[bt.subtensor, bt.metagraph]:
-        if SUBTENSOR_ENDPOINT:
-            parser = argparse.ArgumentParser()
-            bt.subtensor.add_args(parser)
-            subtensor = bt.subtensor(config=bt.config(parser=parser, args=["--subtensor.chain_endpoint", SUBTENSOR_ENDPOINT]))
-        else:
-            subtensor = bt.subtensor("finney")
-        metagraph = subtensor.metagraph(NETUID, lite=False)
-        return subtensor, metagraph
-    return run_with_retries(_internal)
-def get_validator_weights(
-    metagraph: bt.metagraph,
-) -> Dict[int, Tuple[float, int, Dict[int, float]]]:
-    """Returns a dictionary of validator UIDs to (vtrust, stake, {uid: weight})."""
-    ret = {}
-    for uid in metagraph.uids.tolist():
-        vtrust = metagraph.validator_trust[uid].item()
-        if vtrust > 0:
-            ret[uid] = (vtrust, metagraph.S[uid].item(), {})
-            for ouid in metagraph.uids.tolist():
-                if ouid == uid:
-                    continue
-                weight = round(metagraph.weights[uid][ouid].item(), 4)
-                if weight > 0:
-                    ret[uid][-1][ouid] = weight
-    return ret
-def get_subnet_data(
-    subtensor: bt.subtensor, metagraph: bt.metagraph
-) -> List[ModelData]:
-    result = []
-    for uid in metagraph.uids.tolist():
-        hotkey = metagraph.hotkeys[uid]
-        metadata = None
-        try:
-            metadata = run_with_retries(
-                functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
-            )
-        except:
-            print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
-        if not metadata:
-            continue
-        commitment = metadata["info"]["fields"][0]
-        hex_data = commitment[list(commitment.keys())[0]][2:]
-        chain_str = bytes.fromhex(hex_data).decode()
-        block = metadata["block"]
-        incentive = np.nan_to_num(metagraph.incentive[uid]).item()
-        emission = (
-            np.nan_to_num(metagraph.emission[uid]).item() * 20
-        )  # convert to daily TAO
-        model_data = None
-        try:
-            model_data = ModelData.from_compressed_str(
-                uid, hotkey, chain_str, block, incentive, emission
-            )
-        except:
-            continue
-        result.append(model_data)
-    return result
-def is_floatable(x) -> bool:
-    return (
-        isinstance(x, float) and not math.isnan(x) and not math.isinf(x)
-    ) or isinstance(x, int)
-def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
-    """Get the latest runs from Wandb, retrying infinitely until we get them."""
-    while True:
-        api = wandb.Api(api_key=WANDB_TOKEN)
-        runs = list(
-            api.runs(
-                project,
-                filters=filters,
-            )
-        )
-        if len(runs) > 0:
-            return runs
-        # WandDB API is quite unreliable. Wait another minute and try again.
-        print("Failed to get runs from Wandb. Trying again in 60 seconds.")
-        time.sleep(60)
-def get_scores(
-    uids: List[int],
-    wandb_runs: List,
-) -> Dict[int, Dict[str, Optional[float]]]:
-    result = {}
-    previous_timestamp = None
-    # Iterate through the runs until we've processed all the uids.
-    for i, run in enumerate(wandb_runs):
-        if not "original_format_json" in run.summary:
-            continue
-        data = json.loads(run.summary["original_format_json"])
-        all_uid_data = data["uid_data"]
-        timestamp = data["timestamp"]
-        # Make sure runs are indeed in descending time order.
-        #assert (
-        #previous_timestamp is None or timestamp < previous_timestamp
-        #), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
-        previous_timestamp = timestamp
-        for uid in uids:
-            if uid in result:
-                continue
-            if str(uid) in all_uid_data:
-                uid_data = all_uid_data[str(uid)]
-                # Only the most recent run is fresh.
-                is_fresh = i == 0
-                result[uid] = {
-                    "avg_loss": uid_data.get("average_loss", None),
-                    "win_rate": uid_data.get("win_rate", None),
-                    "win_total": uid_data.get("win_total", None),
-                    "weight": uid_data.get("weight", None),
-                    "fresh": is_fresh,
-                }
-        if len(result) == len(uids):
-            break
-    return result
-def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
-    """Returns a dataframe of the best average model loss over time."""
-    timestamps = []
-    best_losses = []
-    for run in wandb_runs:
-        if "original_format_json" not in run.summary:
-            continue
-        data = json.loads(run.summary["original_format_json"])
-        all_uid_data = data["uid_data"]
-        timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
-        best_loss = math.inf
-        for _, uid_data in all_uid_data.items():
-            loss = uid_data.get("average_loss", math.inf)
-            # Filter out the numbers from the exploit and when validators lost the best model.
-            if loss < best_loss and (loss > 2.5 or timestamp > datetime.datetime(2024,2,12)) and (loss < 5 or timestamp > datetime.datetime(2024,3,27)):
-                best_loss = uid_data["average_loss"]
-        if best_loss != math.inf:
-            timestamps.append(timestamp)
-            best_losses.append(best_loss)
-    return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
-def format_score(uid: int, scores, key) -> Optional[float]:
-    if uid in scores:
-        if key in scores[uid]:
-            point = scores[uid][key]
-            if is_floatable(point):
-                return round(scores[uid][key], 4)
-    return None
-def next_epoch(subtensor: bt.subtensor, block: int) -> int:
-    return (
-        block
-        + subtensor.get_subnet_hyperparameters(NETUID).tempo
-        - subtensor.blocks_since_epoch(NETUID, block)
-    )
 def get_next_update_div(current_block: int, next_update_block: int) -> str:
@@ -282,69 +43,21 @@ def get_last_updated_div() -> str:
     return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
-def leaderboard_data(
-    leaderboard: List[ModelData],
-    scores: Dict[int, Dict[str, Optional[float]]],
-    show_stale: bool,
-) -> List[List[Any]]:
-    """Returns the leaderboard data, based on models data and UID scores."""
-    return [
-        [
-            f"[{c.namespace}/{c.name} ({c.commit[0:8]})](https://huggingface.co/{c.namespace}/{c.name}/commit/{c.commit})",
-            format_score(c.uid, scores, "win_rate"),
-            format_score(c.uid, scores, "avg_loss"),
-            format_score(c.uid, scores, "weight"),
-            c.uid,
-            c.block,
-        ]
-        for c in leaderboard
-        if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
-    ]
-def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
-    """Returns the latest benchmarks and the time they were run."""
-    runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
-    for run in runs:
-        artifacts = list(run.logged_artifacts())
-        if artifacts:
-            table = artifacts[-1].get("benchmarks")
-            if table:
-                return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
-    bt.logging.error("Failed to get benchmarks from Wandb.")
-    return None, None
 def restart_space():
-    API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
 def main():
     # To avoid leaderboard failures, infinitely try until we get all data
     # needed to populate the dashboard
-    while True:
-        try:
-            subtensor, metagraph = get_subtensor_and_metagraph()
-            model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
-            model_data.sort(key=lambda x: x.incentive, reverse=True)
-            vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
-            scores = get_scores([x.uid for x in model_data], vali_runs)
-            # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
-            # current_block = metagraph.block.item()
-            # next_epoch_block = next_epoch(subtensor, current_block)
-            validator_df = get_validator_weights(metagraph)
-            weight_keys = set()
-            for uid, stats in validator_df.items():
-                weight_keys.update(stats[-1].keys())
-            #benchmarks, benchmark_timestamp = get_benchmarks()
-            break
-        except Exception as e:
-            print(f"Failed to get data: {e}")
-            time.sleep(30)
     demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
     with demo:
@@ -363,20 +76,17 @@ def main():
             },
             num_top_classes=10,
         )
-        '''
         if benchmarks is not None:
             with gr.Accordion("Top Model Benchmarks"):
                 gr.components.Dataframe(benchmarks)
                 gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/macrocosm-os/pretraining/blob/dev/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
                 gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
-        '''
         with gr.Accordion("Evaluation Stats"):
             gr.HTML(EVALUATION_HEADER)
             show_stale = gr.Checkbox(label="Show Stale", interactive=True)
             leaderboard_table = gr.components.Dataframe(
-                value=leaderboard_data(model_data, scores, show_stale.value),
                 headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
                 datatype=["markdown", "number", "number", "number", "number", "number"],
                 elem_id="leaderboard-table",
@@ -385,13 +95,13 @@ def main():
             )
             gr.HTML(EVALUATION_DETAILS)
             show_stale.change(
-                lambda stale: leaderboard_data(model_data, scores, stale),
                 inputs=[show_stale],
                 outputs=leaderboard_table,
             )
             gr.LinePlot(
-                get_losses_over_time(vali_runs),
                 x="timestamp",
                 x_title="Date",
                 y="best_loss",
@@ -405,30 +115,7 @@ def main():
         with gr.Accordion("Validator Stats"):
             gr.components.Dataframe(
-                value=[
-                    [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
-                    + [
-                        validator_df[uid][-1].get(c.uid)
-                        for c in model_data
-                        if c.incentive
-                    ]
-                    for uid, _ in sorted(
-                        zip(
-                            validator_df.keys(),
-                            [validator_df[x][1] for x in validator_df.keys()],
-                        ),
-                        key=lambda x: x[1],
-                        reverse=True,
-                    )
-                ],
-                headers=["UID", "Stake (τ)", "V-Trust"]
-                + [
-                    f"{c.namespace}/{c.name} ({c.commit[0:8]})"
-                    for c in model_data
-                    if c.incentive
-                ],
-                datatype=["number", "number", "number"]
-                + ["number" for c in model_data if c.incentive],
                 interactive=False,
                 visible=True,
             )

 # Code adapted from: https://huggingface.co/spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
 import os
 import datetime
+import gradio as gr
 from dotenv import load_dotenv
 from huggingface_hub import HfApi
 from apscheduler.schedulers.background import BackgroundScheduler
+import utils
 FONT = (
     """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
+HF_REPO_ID = "macrocosm-os/pretraining-leaderboard"
+SECONDS_PER_BLOCK = 12
+load_dotenv()
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+API = HfApi(token=HF_TOKEN)
 def get_next_update_div(current_block: int, next_update_block: int) -> str:
     return f"""<div>Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>"""
 def restart_space():
+    API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)
 def main():
     # To avoid leaderboard failures, infinitely try until we get all data
     # needed to populate the dashboard
+    state_vars = utils.load_state_vars()
+    model_data = state_vars["model_data"]
+    vali_runs = state_vars["vali_runs"]
+    scores = state_vars["scores"]
+    validator_df = state_vars["validator_df"]
+    benchmarks = state_vars.get("benchmarks", None)
+    benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
     demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
     with demo:
             },
             num_top_classes=10,
         )
         if benchmarks is not None:
             with gr.Accordion("Top Model Benchmarks"):
                 gr.components.Dataframe(benchmarks)
                 gr.HTML("""<div>PPL computed using a stride of 512. See <a href='https://github.com/macrocosm-os/pretraining/blob/dev/scripts/run_benchmarks.py'>here</a> for the full code.</div>""")
                 gr.HTML(f"""<div>Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)</div>""")
         with gr.Accordion("Evaluation Stats"):
             gr.HTML(EVALUATION_HEADER)
             show_stale = gr.Checkbox(label="Show Stale", interactive=True)
             leaderboard_table = gr.components.Dataframe(
+                value=utils.leaderboard_data(model_data, scores, show_stale.value),
                 headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
                 datatype=["markdown", "number", "number", "number", "number", "number"],
                 elem_id="leaderboard-table",
             )
             gr.HTML(EVALUATION_DETAILS)
             show_stale.change(
+                lambda stale: utils.leaderboard_data(model_data, scores, stale),
                 inputs=[show_stale],
                 outputs=leaderboard_table,
             )
             gr.LinePlot(
+                utils.get_losses_over_time(vali_runs),
                 x="timestamp",
                 x_title="Date",
                 y="best_loss",
         with gr.Accordion("Validator Stats"):
             gr.components.Dataframe(
+                utils.make_validator_dataframe(validator_df, model_data),
                 interactive=False,
                 visible=True,
             )

requirements.txt CHANGED Viewed

@@ -4,5 +4,7 @@ wandb
 python-dotenv
 APScheduler
 huggingface-hub
 pandas

 python-dotenv
 APScheduler
 huggingface-hub
+gradio
 pandas
+flask

utils.py ADDED Viewed

	@@ -0,0 +1,449 @@

+import os
+import math
+import time
+import json
+import wandb
+import pickle
+import datetime
+import argparse
+import functools
+import traceback
+import pandas as pd
+import numpy as np
+import bittensor as bt
+from dotenv import load_dotenv
+from dataclasses import dataclass
+from typing import Dict, List, Any, Optional, Tuple
+from bittensor.extrinsics.serving import get_metadata
+NETUID = 9
+DELAY_SECS = 3
+RETRIES = 1# 5
+load_dotenv()
+WANDB_TOKEN = os.environ.get("WANDB_API_KEY", None)
+SUBTENSOR_ENDPOINT = os.environ.get("SUBTENSOR_ENDPOINT", None)
+VALIDATOR_WANDB_PROJECT = "opentensor-dev/pretraining-subnet"
+BENCHMARK_WANDB_PROJECT = "pretraining-benchmark-data"
+BENCHMARK_FLAG = os.environ.get("BENCHMARK_FLAG", None)
+@dataclass
+class ModelData:
+    uid: int
+    hotkey: str
+    namespace: str
+    name: str
+    commit: str
+    hash: str
+    block: int
+    incentive: float
+    emission: float
+    @classmethod
+    def from_compressed_str(
+        cls,
+        uid: int,
+        hotkey: str,
+        cs: str,
+        block: int,
+        incentive: float,
+        emission: float,
+    ):
+        """Returns an instance of this class from a compressed string representation"""
+        tokens = cs.split(":")
+        return ModelData(
+            uid=uid,
+            hotkey=hotkey,
+            namespace=tokens[0],
+            name=tokens[1],
+            commit=tokens[2] if tokens[2] != "None" else None,
+            hash=tokens[3] if tokens[3] != "None" else None,
+            block=block,
+            incentive=incentive,
+            emission=emission,
+        )
+def run_with_retries(func, *args, **kwargs):
+    for i in range(0, RETRIES):
+        try:
+            return func(*args, **kwargs)
+        except (Exception, RuntimeError):
+            bt.logging.error(f"Failed to run function: {traceback.format_exc()}")
+            if i == RETRIES - 1:
+                raise
+            time.sleep(DELAY_SECS)
+    raise RuntimeError("Should never happen")
+def get_subtensor_and_metagraph() -> Tuple[bt.subtensor, bt.metagraph]:
+    def _internal() -> Tuple[bt.subtensor, bt.metagraph]:
+        if SUBTENSOR_ENDPOINT:
+            parser = argparse.ArgumentParser()
+            bt.subtensor.add_args(parser)
+            subtensor = bt.subtensor(config=bt.config(parser=parser, args=["--subtensor.chain_endpoint", SUBTENSOR_ENDPOINT]))
+        else:
+            subtensor = bt.subtensor("finney")
+        metagraph = subtensor.metagraph(NETUID, lite=False)
+        return subtensor, metagraph
+    return run_with_retries(_internal)
+def get_subnet_data(
+    subtensor: bt.subtensor, metagraph: bt.metagraph
+) -> List[ModelData]:
+    result = []
+    for uid in metagraph.uids.tolist():
+        hotkey = metagraph.hotkeys[uid]
+        metadata = None
+        try:
+            metadata = run_with_retries(
+                functools.partial(get_metadata, subtensor, metagraph.netuid, hotkey)
+            )
+        except:
+            print(f"Failed to get metadata for UID {uid}: {traceback.format_exc()}")
+        if not metadata:
+            continue
+        commitment = metadata["info"]["fields"][0]
+        hex_data = commitment[list(commitment.keys())[0]][2:]
+        chain_str = bytes.fromhex(hex_data).decode()
+        block = metadata["block"]
+        incentive = np.nan_to_num(metagraph.incentive[uid]).item()
+        emission = (
+            np.nan_to_num(metagraph.emission[uid]).item() * 20
+        )  # convert to daily TAO
+        model_data = None
+        try:
+            model_data = ModelData.from_compressed_str(
+                uid, hotkey, chain_str, block, incentive, emission
+            )
+        except:
+            continue
+        result.append(model_data)
+    return result
+def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
+    """Get the latest runs from Wandb, retrying infinitely until we get them."""
+    while True:
+        api = wandb.Api(api_key=WANDB_TOKEN)
+        runs = list(
+            api.runs(
+                project,
+                filters=filters,
+            )
+        )
+        if len(runs) > 0:
+            return runs
+        # WandDB API is quite unreliable. Wait another minute and try again.
+        bt.logging.error("Failed to get runs from Wandb. Trying again in 60 seconds.")
+        time.sleep(60)
+def get_scores(
+    uids: List[int],
+    wandb_runs: List,
+) -> Dict[int, Dict[str, Optional[float]]]:
+    result = {}
+    previous_timestamp = None
+    # Iterate through the runs until we've processed all the uids.
+    for i, run in enumerate(wandb_runs):
+        if not "original_format_json" in run.summary:
+            continue
+        data = json.loads(run.summary["original_format_json"])
+        all_uid_data = data["uid_data"]
+        timestamp = data["timestamp"]
+        # Make sure runs are indeed in descending time order.
+        #assert (
+        #previous_timestamp is None or timestamp < previous_timestamp
+        #), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
+        previous_timestamp = timestamp
+        for uid in uids:
+            if uid in result:
+                continue
+            if str(uid) in all_uid_data:
+                uid_data = all_uid_data[str(uid)]
+                # Only the most recent run is fresh.
+                is_fresh = i == 0
+                result[uid] = {
+                    "avg_loss": uid_data.get("average_loss", None),
+                    "win_rate": uid_data.get("win_rate", None),
+                    "win_total": uid_data.get("win_total", None),
+                    "weight": uid_data.get("weight", None),
+                    "fresh": is_fresh,
+                }
+        if len(result) == len(uids):
+            break
+    return result
+def get_validator_weights(
+    metagraph: bt.metagraph,
+) -> Dict[int, Tuple[float, int, Dict[int, float]]]:
+    """Returns a dictionary of validator UIDs to (vtrust, stake, {uid: weight})."""
+    ret = {}
+    for uid in metagraph.uids.tolist():
+        vtrust = metagraph.validator_trust[uid].item()
+        stake = metagraph.stake[uid].item()
+        if vtrust > 0 and stake > 10_000:
+            ret[uid] = (vtrust, stake, {})
+            for ouid in metagraph.uids.tolist():
+                if ouid == uid:
+                    continue
+                weight = round(metagraph.weights[uid][ouid].item(), 4)
+                if weight > 0:
+                    ret[uid][-1][ouid] = weight
+    return ret
+def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
+    """Returns a dataframe of the best average model loss over time."""
+    timestamps = []
+    best_losses = []
+    for run in wandb_runs:
+        if "original_format_json" not in run.summary:
+            continue
+        data = json.loads(run.summary["original_format_json"])
+        all_uid_data = data["uid_data"]
+        timestamp = datetime.datetime.fromtimestamp(data["timestamp"])
+        best_loss = math.inf
+        for _, uid_data in all_uid_data.items():
+            loss = uid_data.get("average_loss", math.inf)
+            # Filter out the numbers from the exploit and when validators lost the best model.
+            if loss < best_loss and (loss > 2.5 or timestamp > datetime.datetime(2024,2,12)) and (loss < 5 or timestamp > datetime.datetime(2024,3,27)):
+                best_loss = uid_data["average_loss"]
+        if best_loss != math.inf:
+            timestamps.append(timestamp)
+            best_losses.append(best_loss)
+    return pd.DataFrame({"timestamp": timestamps, "best_loss": best_losses})
+def next_epoch(subtensor: bt.subtensor, block: int) -> int:
+    return (
+        block
+        + subtensor.get_subnet_hyperparameters(NETUID).tempo
+        - subtensor.blocks_since_epoch(NETUID, block)
+    )
+def is_floatable(x) -> bool:
+    return (
+        isinstance(x, float) and not math.isnan(x) and not math.isinf(x)
+    ) or isinstance(x, int)
+def format_score(uid: int, scores, key) -> Optional[float]:
+    if uid in scores:
+        if key in scores[uid]:
+            point = scores[uid][key]
+            if is_floatable(point):
+                return round(scores[uid][key], 4)
+    return None
+def leaderboard_data(
+    leaderboard: List[ModelData],
+    scores: Dict[int, Dict[str, Optional[float]]],
+    show_stale: bool,
+) -> List[List[Any]]:
+    """Returns the leaderboard data, based on models data and UID scores."""
+    return [
+        [
+            f"[{c.namespace}/{c.name} ({c.commit[0:8]})](https://huggingface.co/{c.namespace}/{c.name}/commit/{c.commit})",
+            format_score(c.uid, scores, "win_rate"),
+            format_score(c.uid, scores, "avg_loss"),
+            format_score(c.uid, scores, "weight"),
+            c.uid,
+            c.block,
+        ]
+        for c in leaderboard
+        if (c.uid in scores and scores[c.uid]["fresh"]) or show_stale
+    ]
+def get_benchmarks() -> Tuple[pd.DataFrame, datetime.datetime]:
+    """Returns the latest benchmarks and the time they were run."""
+    if not BENCHMARK_WANDB_PROJECT:
+        bt.logging.error("No benchmark project set.")
+        return None, None
+    runs = get_wandb_runs(project=BENCHMARK_WANDB_PROJECT, filters=None)
+    for run in runs:
+        artifacts = list(run.logged_artifacts())
+        if artifacts:
+            table = artifacts[-1].get("benchmarks")
+            if table:
+                return table.get_dataframe(), datetime.datetime.strptime(run.metadata["startedAt"], "%Y-%m-%dT%H:%M:%S.%f")
+    bt.logging.error("Failed to get benchmarks from Wandb.")
+    return None, None
+def make_validator_dataframe(validator_df: pd.DataFrame, model_data: ModelData) -> pd.DataFrame:
+    values = [
+            [uid, int(validator_df[uid][1]), round(validator_df[uid][0], 4)]
+            + [
+                validator_df[uid][-1].get(c.uid)
+                for c in model_data
+                if c.incentive
+            ]
+            for uid, _ in sorted(
+                zip(
+                    validator_df.keys(),
+                    [validator_df[x][1] for x in validator_df.keys()],
+                ),
+                key=lambda x: x[1],
+                reverse=True,
+            )
+        ]
+    dtypes = {"UID":int, "Stake (τ)":float, "V-Trust":float}
+    dtypes.update({
+            f"{c.namespace}/{c.name} ({c.commit[0:8]})": float
+            for c in model_data
+            if c.incentive
+        })
+    return pd.DataFrame(values, columns=dtypes.keys()).astype(dtypes)
+def make_metagraph_dataframe(metagraph: bt.metagraph, weights=False) -> pd.DataFrame:
+    cols = ['stake','emission','trust','validator_trust','dividends','incentive','R', 'consensus','validator_permit']
+    frame = pd.DataFrame({k: getattr(metagraph, k) for k in cols})
+    frame['block'] = metagraph.block.item()
+    frame['netuid'] = NETUID
+    frame['uid'] = range(len(frame))
+    frame['hotkey'] = [axon.hotkey for axon in metagraph.axons]
+    frame['coldkey'] = [axon.coldkey for axon in metagraph.axons]
+    if weights and metagraph.W is not None:
+        # convert NxN tensor to a list of lists so it fits into the dataframe
+        frame['weights'] = [w.tolist() for w in metagraph.W]
+    return frame
+def load_state_vars() -> dict[Any]:
+    while True:
+        try:
+            subtensor, metagraph = get_subtensor_and_metagraph()
+            bt.logging.success("Loaded subtensor and metagraph")
+            model_data: List[ModelData] = get_subnet_data(subtensor, metagraph)
+            model_data.sort(key=lambda x: x.incentive, reverse=True)
+            bt.logging.success(f'Loaded {len(model_data)} models')
+            vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
+            scores = get_scores([x.uid for x in model_data], vali_runs)
+            # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
+            # current_block = metagraph.block.item()
+            # next_epoch_block = next_epoch(subtensor, current_block)
+            validator_df = get_validator_weights(metagraph)
+            weight_keys = set()
+            for uid, stats in validator_df.items():
+                weight_keys.update(stats[-1].keys())
+            # TODO: re-enable benchmarks
+            # Enable benchmark if the flag is set
+            if BENCHMARK_FLAG:
+                benchmarks, benchmark_timestamp = get_benchmarks()
+            else:
+                benchmarks, benchmark_timestamp = None, None
+            break
+        except KeyboardInterrupt:
+            bt.logging.error("Exiting...")
+            break
+        except Exception as e:
+            print(f"Failed to get data: {traceback.format_exc()}")
+            time.sleep(30)
+    return {
+        'metagraph': metagraph,
+        "model_data": model_data,
+        "vali_runs": vali_runs,
+        "scores": scores,
+        "validator_df": validator_df,
+        "benchmarks": benchmarks,
+        "benchmark_timestamp": benchmark_timestamp
+    }
+def test_load_state_vars():
+    subtensor = bt.subtensor("finney")
+    metagraph = subtensor.metagraph(NETUID, lite=True)
+    model_data = [
+        ModelData(uid=253, hotkey='5DjoPAgZ54Zf6NsuiVYh8RjonnWWWREE2iXBNzM2VDBMQDPm', namespace='jw-hf-test', name='jw2', commit='aad131f6b02219964e6dcf749c2a23e75a7ceca8', hash='L1ImYzWJwV+9KSnZ2TYW0Iy2KMcVjJVTd30YJoRkpbw=', block=3131103, incentive=1.0, emission=209.06051635742188),
+        ModelData(uid=1, hotkey='5CccVtjk4yamCao6QYgEg7jc8vktdj16RbLKNUftHfEsjuJS', namespace='borggAI', name='bittensor-subnet9-models', commit='d373864bc6c972872edb8db95eed570958054bac', hash='+drdTIKYEGYClW2FFVVID6A2Dh//4rLmExRFCJsH6Y4=', block=2081837, incentive=0.0, emission=0.0),
+        ModelData(uid=2, hotkey='5HYwoXaczs3jAptbb5mk4aUCkgZqeNcNzJKxSec97GwasfLy', namespace='jungiebeen', name='pretrain1', commit='4c0c6bfd0f92e243d6c8a82209142e7204c852c3', hash='ld/agc0XIWICom/Cpj0fkQLcMogMNj/F65MJogK5RLY=', block=2467482, incentive=0.0, emission=0.0),
+        ModelData(uid=3, hotkey='5Dnb6edh9yTeEp5aasRPZVPRAkxvQ6qnERVcXw22awMZ5rxm', namespace='jungiebeen', name='pretrain2', commit='e827b7281c92224adb11124489cc45356553a87a', hash='ld/agc0XIWICom/Cpj0fkQLcMogMNj/F65MJogK5RLY=', block=2467497, incentive=0.0, emission=0.0),
+        ModelData(uid=4, hotkey='5FRfca8NbnH424WaX43PMhKBnbLA1bZpRRoXXiVs6HgsxN4K', namespace='ZainAli60', name='mine_modeles', commit='8a4ed4ad1f1fb58d424fd22e8e9874b87d32917c', hash='tVcbZAFoNIOF+Ntxq31OQ2NrLXf5iFCmmPUJlpkMYYo=', block=2508509, incentive=0.0, emission=0.0)
+    ]
+    vali_runs = get_wandb_runs(project=VALIDATOR_WANDB_PROJECT, filters={"config.type": "validator", "config.uid": 238})
+    scores = get_scores([x.uid for x in model_data], vali_runs)
+    validator_df = {
+        28: (1.0, 33273.4453125, {253: 1.0}),
+        49: (0.9127794504165649,
+        10401.677734375,
+        {7: 0.0867,
+        217: 0.0001,
+        219: 0.0001,
+        241: 0.0001,
+        248: 0.0001,
+        253: 0.9128}),
+        78: (1.0, 26730.37109375, {253: 1.0}),
+        116: (1.0, 629248.4375, {253: 1.0}),
+        150: (1.0, 272634.53125, {253: 1.0}),
+        161: (1.0, 280212.53125, {253: 1.0}),
+        180: (1.0, 16838.0, {253: 1.0}),
+        184: (1.0, 47969.3984375, {253: 1.0}),
+        210: (1.0, 262846.28125, {253: 1.0}),
+        213: (1.0, 119462.734375, {253: 1.0}),
+        215: (1.0, 274747.46875, {253: 1.0}),
+        234: (1.0, 38831.6953125, {253: 1.0}),
+        236: (1.0, 183966.9375, {253: 1.0}),
+        238: (1.0, 1293707.25, {253: 1.0}),
+        240: (1.0, 106461.6015625, {253: 1.0}),
+        243: (1.0, 320271.5, {253: 1.0}),
+        244: (1.0, 116138.9609375, {253: 1.0}),
+        247: (0.9527428150177002, 119812.390625, {7: 0.0472, 253: 0.9528}),
+        249: (1.0, 478127.3125, {253: 1.0}),
+        252: (1.0, 442395.03125, {253: 1.0}),
+        254: (1.0, 46845.2109375, {253: 1.0}),
+        255: (1.0, 28977.56640625, {253: 1.0})
+    }
+    return {
+        'metagraph': metagraph,
+        "model_data": model_data,
+        "vali_runs": vali_runs,
+        "scores": scores,
+        "validator_df": validator_df,
+    }