|
import gradio as gr |
|
import pandas as pd |
|
import json |
|
import os |
|
|
|
|
|
TASKS_CLUSTERING = [ |
|
"ArxivClusteringP2P", |
|
"ArxivClusteringS2S", |
|
"BiorxivClusteringP2P", |
|
"BiorxivClusteringS2S", |
|
"MedrxivClusteringP2P", |
|
"MedrxivClusteringS2S", |
|
"RedditClustering", |
|
"RedditClusteringP2P", |
|
"StackExchangeClustering", |
|
"StackExchangeClusteringP2P", |
|
"TwentyNewsgroupsClustering", |
|
] |
|
|
|
TASKS_PAIR_CLASSIFICATION = [ |
|
"SprintDuplicateQuestions", |
|
"TwitterSemEval2015", |
|
"TwitterURLCorpus", |
|
] |
|
|
|
|
|
MODELS = [ |
|
"all-MiniLM-L6-v2" |
|
] |
|
|
|
|
|
def get_model_size(model_name): |
|
return os.path.getsize(f"models/{model_name}/pytorch_model.bin") / (1024.0 * 1024.0) |
|
|
|
|
|
def compute_model_score(model_name): |
|
results_dir = "results" |
|
model_dir = os.path.join(results_dir, model_name) |
|
|
|
scores = [] |
|
|
|
|
|
for task in TASKS_CLUSTERING: |
|
task_file = os.path.join(model_dir, f"{task}.json") |
|
with open(task_file, 'r') as f: |
|
data = json.load(f) |
|
v_measure = data['test']['v_measure'] |
|
scores.append(v_measure) |
|
|
|
|
|
for task in TASKS_PAIR_CLASSIFICATION: |
|
task_file = os.path.join(model_dir, f"{task}.json") |
|
with open(task_file, 'r') as f: |
|
data = json.load(f) |
|
max_ap = data['test']['max']['ap'] |
|
scores.append(max_ap) |
|
|
|
|
|
average_score = sum(scores) / len(scores) |
|
return average_score |
|
|
|
|
|
DATA = { |
|
"Model": MODELS, |
|
"Model Size (MB)": [ |
|
get_model_size(model) for model in MODELS |
|
], |
|
"Score": [ |
|
5 |
|
], |
|
"q8 Model Size (MB)": [ |
|
get_model_size(model + "-q8") for model in MODELS |
|
], |
|
"q8 Score": [ |
|
compute_model_score(model + "-q8") for model in MODELS |
|
], |
|
} |
|
|
|
with open('data.json', 'w') as json_file: |
|
json.dump(DATA, json_file) |
|
|
|
|
|
|
|
|