File size: 4,353 Bytes
aeaa0ca 44d4ff3 886a109 44d4ff3 886a109 44d4ff3 5b15f5e 376d3eb bd5abdb bfa6f44 bd5abdb 4a498d2 e90ffc7 5128d5f e90ffc7 aeaa0ca bfa6f44 376d3eb 3ebc784 5a2b3ee 3ebc784 4afa91d 2f02c91 b9d3833 3ebc784 886a109 4afa91d 3ebc784 13ff3a0 376d3eb 4afa91d 2f02c91 4a498d2 5b15f5e 886a109 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import pandas as pd
def add_model_readme(df):
# write model ids to README.md
with open("README.md", "r") as f:
lines = f.readlines()
links = df["Links"].astype(str)
for link in links:
model_id = link.split(".co/")[1]
# verify line doesn't exist
if f"- {model_id}\n" in lines:
continue
lines.insert(-1, f"- {model_id}\n")
with open("README.md", "w") as f:
f.writelines(lines)
df = pd.read_csv("data/raw_scores.csv")
COLS = df.columns.to_list()
# add column models_query with same values a smodels at the end of columns
df.insert(len(COLS), "models_query", df["Models"])
print(f"all cols {df.columns.to_list()}")
# average score
mean_columns = df.iloc[:,5:-3]
# print cols in mean_columns
print("cols", mean_columns.columns.to_list())
df.insert(len(mean_columns.columns.to_list()), "Average score", mean_columns.mean(axis=1).round(2))
# add win rate columns for each language
old_size = len(df.columns)
for col in df.columns[6:-2]:
df[col + " rank"] = df[col].rank(ascending=False)
df[col + " rank"] = len(df) - (df[col + " rank"] - 1)
df["Win Rate"] = df.iloc[:, old_size:].mean(axis=1).round(2)
df = df.drop(df.columns[old_size:-1], axis=1)
df = df[["Models", "Size (B)", "Win Rate"] + df.columns[2:-1].tolist()]
# sort with regard to column win rate
df = df.sort_values(by=["Win Rate"], ascending=False)
# add column with model links as https://huggingface.co/WizardLM/WizardCoder-15B-V1.0, https://huggingface.co/bigcode/starcoder, https://huggingface.co/bigcode/starcoderbase, https://huggingface.co/bigcode/starcoderbase-7b,
# https://huggingface.co/bigcode/starcoderbase-3b, https://huggingface.co/bigcode/starcoderbase-1b, https://huggingface.co/bigcode/santacoder, https://huggingface.co/replit/replit-code-v1-3b, https://huggingface.co/THUDM/codegeex2-6b
links = {
"WizardCoder-15B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-15B-V1.0",
"WizardCoder-3B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-3B-V1.0",
"WizardCoder-1B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-1B-V1.0",
"WizardCoder-Python-34B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0",
"WizardCoder-Python-13B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-Python-13B-V1.0",
"OctoCoder-15B": "https://huggingface.co/bigcode/octocoder",
"OctoGeeX-7B": "https://huggingface.co/bigcode/octogeex",
"StableCode-3B": "https://huggingface.co/stabilityai/stablecode-completion-alpha-3b",
"StarCoder-15B": "https://huggingface.co/bigcode/starcoder",
"StarCoderBase-15B": "https://huggingface.co/bigcode/starcoderbase",
"StarCoderBase-7B": "https://huggingface.co/bigcode/starcoderbase-7b",
"StarCoderBase-3B": "https://huggingface.co/bigcode/starcoderbase-3b",
"StarCoderBase-1.1B": "https://huggingface.co/bigcode/starcoderbase-1b",
"SantaCoder-1.1B": "https://huggingface.co/bigcode/santacoder",
"Replit-2.7B": "https://huggingface.co/replit/replit-code-v1-3b",
"CodeGeex2-6B": "https://huggingface.co/THUDM/codegeex2-6b",
"CodeGen25-7B-multi": "https://huggingface.co/Salesforce/codegen25-7b-multi",
"CodeGen25-7B-mono": "https://huggingface.co/Salesforce/codegen25-7b-mono",
"CodeGen-16B-Multi": "https://huggingface.co/Salesforce/codegen-16B-multi",
"DeciCoder-1B": "https://huggingface.co/Deci/DeciCoder-1b",
"Phind-CodeLlama-34B-v1": "https://huggingface.co/phind/Phind-CodeLlama-34B-v1",
"Phind-CodeLlama-34B-Python-v1": "https://huggingface.co/phind/Phind-CodeLlama-34B-Python-v1",
"Phind-CodeLlama-34B-v2": "https://huggingface.co/phind/Phind-CodeLlama-34B-v2",
}
codellamas = ['CodeLlama-7b', 'CodeLlama-7b-Python', 'CodeLlama-7b-Instruct', 'CodeLlama-13b', 'CodeLlama-13b-Python', 'CodeLlama-13b-Instruct', 'CodeLlama-34b', 'CodeLlama-34b-Python', 'CodeLlama-34b-Instruct']
for codellama in codellamas:
links[codellama] = f"https://huggingface.co/codellama/{codellama}-hf"
df["Links"] = df["Models"].map(links)
df.insert(0, "T", "🟢")
patterns = ["WizardCoder", "Octo", "Instruct", "Phind"]
df.loc[df["Models"].str.contains('|'.join(patterns)), "T"] = "🔶"
# print first 5 rows and 10 cols
print(df.iloc[:5, :-1])
df.to_csv("data/code_eval_board.csv", index=False)
# fill readme
add_model_readme(df)
print("Readme filled") |