Spaces:
Running
Running
import pandas as pd | |
TYPES = [ | |
"str", | |
"number", | |
"number" | |
] | |
SWEBENCH_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Accuracy", | |
"Total Cost", | |
"Runs", | |
] | |
SWEBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
SWEBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
USACO_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Accuracy", | |
"Total Cost", | |
"Runs", | |
] | |
USACO_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
USACO_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
COREBENCH_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Accuracy", | |
"Total Cost", | |
"Runs", | |
] | |
COREBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
COREBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
MLAGENTBENCH_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Overall Score", | |
"Total Cost", | |
] | |
MLAGENTBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
MLAGENTBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Accuracy'] | |
NUMERIC_INTERVALS = { | |
"?": pd.Interval(-1, 0, closed="right"), | |
"~1.5": pd.Interval(0, 2, closed="right"), | |
"~3": pd.Interval(2, 4, closed="right"), | |
"~7": pd.Interval(4, 9, closed="right"), | |
"~13": pd.Interval(9, 20, closed="right"), | |
"~35": pd.Interval(20, 45, closed="right"), | |
"~60": pd.Interval(45, 70, closed="right"), | |
"70+": pd.Interval(70, 10000, closed="right"), | |
} | |
CYBENCH_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Accuracy", | |
"Total Cost", | |
"Runs", | |
] | |
CYBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
CYBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
APPWORLD_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Accuracy", | |
"Total Cost", | |
"Runs", | |
"Scenario Goal Completion" | |
] | |
APPWORLD_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
APPWORLD_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
GAIA_ON_LOAD_COLUMNS = [ | |
"Agent Name", | |
"Accuracy", | |
"Level 1 Accuracy", | |
"Level 2 Accuracy", | |
"Level 3 Accuracy", | |
"Total Cost", | |
"Runs", | |
] | |
GAIA_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
GAIA_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score'] |