File size: 2,055 Bytes
f70e4f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import pandas as pd
import json
import os
TASKS_CLUSTERING = [
"ArxivClusteringP2P",
"ArxivClusteringS2S",
"BiorxivClusteringP2P",
"BiorxivClusteringS2S",
"MedrxivClusteringP2P",
"MedrxivClusteringS2S",
"RedditClustering",
"RedditClusteringP2P",
"StackExchangeClustering",
"StackExchangeClusteringP2P",
"TwentyNewsgroupsClustering",
]
TASKS_PAIR_CLASSIFICATION = [
"SprintDuplicateQuestions",
"TwitterSemEval2015",
"TwitterURLCorpus",
]
MODELS = [
"all-MiniLM-L6-v2"
]
def get_model_size(model_name):
return os.path.getsize(f"models/{model_name}/pytorch_model.bin") / (1024.0 * 1024.0)
def compute_model_score(model_name):
results_dir = "results"
model_dir = os.path.join(results_dir, model_name)
scores = []
# Get scores for clustering tasks
for task in TASKS_CLUSTERING:
task_file = os.path.join(model_dir, f"{task}.json")
with open(task_file, 'r') as f:
data = json.load(f)
v_measure = data['test']['v_measure']
scores.append(v_measure)
# Get scores for pair classification tasks
for task in TASKS_PAIR_CLASSIFICATION:
task_file = os.path.join(model_dir, f"{task}.json")
with open(task_file, 'r') as f:
data = json.load(f)
max_ap = data['test']['max']['ap']
scores.append(max_ap)
# Compute average score
average_score = sum(scores) / len(scores)
return average_score
DATA = {
"Model": MODELS,
"Model Size (MB)": [
get_model_size(model) for model in MODELS
],
"Score": [
5 # compute_model_score(model) for model in MODELS
],
"q8 Model Size (MB)": [
get_model_size(model + "-q8") for model in MODELS
],
"q8 Score": [
compute_model_score(model + "-q8") for model in MODELS
],
}
with open('data.json', 'w') as json_file:
json.dump(DATA, json_file)
|