File size: 8,463 Bytes
aeaa0ca 6b3d762 aeaa0ca 44d4ff3 f55aa6a 44d4ff3 5b15f5e 376d3eb c47d747 bd5abdb bfa6f44 bd5abdb 4a498d2 e90ffc7 5128d5f e90ffc7 c47d747 aeaa0ca bfa6f44 376d3eb 3ebc784 5a2b3ee 3ebc784 4afa91d 2f02c91 48c5fca 0131ab9 3ebc784 886a109 4afa91d d775509 f55aa6a e056b5e 6df95f1 6b3d762 e3e355a 08e5a25 118d3e1 5b90223 c47d747 48c5fca cb2e5cf b2c4485 c99e6b7 d242132 3ebc784 d775509 c946bf3 13ff3a0 c47d747 376d3eb d242132 c47d747 c99e6b7 6df95f1 c47d747 48c5fca cb2e5cf b2c4485 6b3d762 2f02c91 4a498d2 5b15f5e 886a109 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import pandas as pd
from utils import model_hyperlink
def add_model_readme(df):
# write model ids to README.md
with open("README.md", "r") as f:
lines = f.readlines()
links = df["Links"].astype(str)
for link in links:
try:
model_id = link.split(".co/")[1]
# verify line doesn't exist
if f"- {model_id}\n" in lines:
continue
lines.insert(-1, f"- {model_id}\n")
except IndexError:
print(f"link {link} is not valid")
with open("README.md", "w") as f:
f.writelines(lines)
df = pd.read_csv("data/raw_scores.csv")
COLS = df.columns.to_list()
# add column models_query with same values a smodels at the end of columns
df.insert(len(COLS), "models_query", df["Model"])
print(f"all cols {df.columns.to_list()}")
# average score
mean_columns = df.iloc[:,5:-3]
# print cols in mean_columns
print("cols", mean_columns.columns.to_list())
df.insert(len(mean_columns.columns.to_list()), "Average score", mean_columns.mean(axis=1).round(2))
# add win rate columns for each language
old_size = len(df.columns)
for col in df.columns[6:-2]:
df[col + " rank"] = df[col].rank(ascending=False)
df[col + " rank"] = len(df) - (df[col + " rank"] - 1)
df["Win Rate"] = df.iloc[:, old_size:].mean(axis=1).round(2)
df = df.drop(df.columns[old_size:-1], axis=1)
df = df[["Model", "Size (B)", "Win Rate"] + df.columns[2:-1].tolist()]
# sort with regard to column win rate
df = df.sort_values(by=["Win Rate"], ascending=False)
# add column with model links as https://huggingface.co/WizardLM/WizardCoder-15B-V1.0, https://huggingface.co/bigcode/starcoder, https://huggingface.co/bigcode/starcoderbase, https://huggingface.co/bigcode/starcoderbase-7b,
# https://huggingface.co/bigcode/starcoderbase-3b, https://huggingface.co/bigcode/starcoderbase-1b, https://huggingface.co/bigcode/santacoder, https://huggingface.co/replit/replit-code-v1-3b, https://huggingface.co/THUDM/codegeex2-6b
links = {
"WizardCoder-15B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-15B-V1.0",
"WizardCoder-3B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-3B-V1.0",
"WizardCoder-1B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-1B-V1.0",
"WizardCoder-Python-34B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0",
"WizardCoder-Python-13B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-Python-13B-V1.0",
"OctoCoder-15B": "https://huggingface.co/bigcode/octocoder",
"OctoGeeX-7B": "https://huggingface.co/bigcode/octogeex",
"StableCode-3B-alpha": "https://huggingface.co/stabilityai/stablecode-completion-alpha-3b",
"StarCoder2-15B": "https://huggingface.co/bigcode/starcoder2-15b",
"StarCoder2-7B": "https://huggingface.co/bigcode/starcoder2-7b",
"StarCoder2-3B": "https://huggingface.co/bigcode/starcoder2-3b",
"StarCoder-15B": "https://huggingface.co/bigcode/starcoder",
"StarCoderBase-15B": "https://huggingface.co/bigcode/starcoderbase",
"StarCoderBase-7B": "https://huggingface.co/bigcode/starcoderbase-7b",
"StarCoderBase-3B": "https://huggingface.co/bigcode/starcoderbase-3b",
"StarCoderBase-1.1B": "https://huggingface.co/bigcode/starcoderbase-1b",
"SantaCoder-1.1B": "https://huggingface.co/bigcode/santacoder",
"Replit-2.7B": "https://huggingface.co/replit/replit-code-v1-3b",
"CodeGeex2-6B": "https://huggingface.co/THUDM/codegeex2-6b",
"CodeGen25-7B-multi": "https://huggingface.co/Salesforce/codegen25-7b-multi",
"CodeGen25-7B-mono": "https://huggingface.co/Salesforce/codegen25-7b-mono",
"CodeGen-16B-Multi": "https://huggingface.co/Salesforce/codegen-16B-multi",
"DeciCoder-1B": "https://huggingface.co/Deci/DeciCoder-1b",
"Phind-CodeLlama-34B-v1": "https://huggingface.co/phind/Phind-CodeLlama-34B-v1",
"Phind-CodeLlama-34B-Python-v1": "https://huggingface.co/phind/Phind-CodeLlama-34B-Python-v1",
"Phind-CodeLlama-34B-v2": "https://huggingface.co/phind/Phind-CodeLlama-34B-v2",
"Falcon-180B": "https://huggingface.co/tiiuae/falcon-180B",
"Refact-1.6B": "https://huggingface.co/smallcloudai/Refact-1_6B-fim",
"Phi-1": "https://huggingface.co/microsoft/phi-1",
"CodeShell-7B": "https://huggingface.co/WisdomShell/CodeShell-7B",
"DeepSeek-Coder-1b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base",
"DeepSeek-Coder-7b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base",
"DeepSeek-Coder-33b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-base",
"DeepSeek-Coder-7b-instruct": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
"DeepSeek-Coder-33b-instruct": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
"CodeFuse-DeepSeek-33b": "https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B",
"Stable-code-3b": "https://huggingface.co/stabilityai/stable-code-3b",
"OpenCodeInterpreter-DS-33B": "https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-33B",
"OpenCodeInterpreter-DS-6.7B": "https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-6.7B",
"CodeGemma-2B": "https://huggingface.co/google/codegemma-2b",
"CodeGemma-7B": "https://huggingface.co/google/codegemma-7b",
"CodeGemma-7B-it": "https://huggingface.co/google/codegemma-7b-it",
"CodeQwen1.5-7B": "https://huggingface.co/Qwen/CodeQwen1.5-7B",
"CodeQwen1.5-7B-Chat": "https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat",
}
codellamas = ['CodeLlama-7b', 'CodeLlama-7b-Python', 'CodeLlama-7b-Instruct', 'CodeLlama-13b', 'CodeLlama-13b-Python', 'CodeLlama-13b-Instruct', 'CodeLlama-34b', 'CodeLlama-34b-Python', 'CodeLlama-34b-Instruct', 'CodeLlama-70b', 'CodeLlama-70b-Python', 'CodeLlama-70b-Instruct']
for codellama in codellamas:
links[codellama] = f"https://huggingface.co/codellama/{codellama}-hf"
df["Links"] = df["Model"].map(links)
df.insert(0, "T", "🟢")
patterns = ["WizardCoder", "Octo", "Instruct", "Phind", "Refact", "CodeGemma-7B-it", "Chat"]
df.loc[df["Model"].str.contains('|'.join(patterns)), "T"] = "🔶"
df.loc[df["Model"].str.contains('|'.join(patterns)), "T"] = "🔶"
df.loc[df["Model"].str.contains('|'.join(["CodeShell", "DeepSeek-Coder-7b-instruct", "DeepSeek-Coder-33b-instruct", "CodeFuse", "OpenCodeInterpreter"])), "T"] = "🔶 EXT"
df.loc[df["Model"].str.contains('|'.join(["Stable-Code-3b", "DeepSeek-Coder-1b-base", "DeepSeek-Coder-33b-base", "DeepSeek-Coder-7b-base"])), "T"] = "🟢 EXT"
# add clumn submission_pr with empty fiels except for CodeShell with link AA
df["Submission PR"] = ""
df.loc[df["Model"].str.contains('|'.join(["CodeShell"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/16"
df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-1b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/33"
df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-7b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/32"
df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-33b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/31"
df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-7b-instruct"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/43"
df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-33b-instruct"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/42"
df.loc[df["Model"].str.contains('|'.join(["CodeFuse"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/51"
df.loc[df["Model"].str.contains('|'.join(["Stable-code-3b"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/57"
df.loc[df["Model"].str.contains('|'.join(["OpenCodeInterpreter-DS-33B"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/60"
df.loc[df["Model"].str.contains('|'.join(["OpenCodeInterpreter-DS-6.7B"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/61"
# print first 5 rows and 10 cols
print(df.iloc[:5, :-1])
df.to_csv("data/code_eval_board.csv", index=False)
# fill readme
add_model_readme(df)
print("Readme filled") |