File size: 2,055 Bytes
f70e4f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import pandas as pd
import json
import os


TASKS_CLUSTERING = [
    "ArxivClusteringP2P",
    "ArxivClusteringS2S",
    "BiorxivClusteringP2P",
    "BiorxivClusteringS2S",
    "MedrxivClusteringP2P",
    "MedrxivClusteringS2S",
    "RedditClustering",
    "RedditClusteringP2P",
    "StackExchangeClustering",
    "StackExchangeClusteringP2P",
    "TwentyNewsgroupsClustering",
]

TASKS_PAIR_CLASSIFICATION = [
    "SprintDuplicateQuestions",
    "TwitterSemEval2015",
    "TwitterURLCorpus",
]


MODELS = [
    "all-MiniLM-L6-v2"
]


def get_model_size(model_name):
    return os.path.getsize(f"models/{model_name}/pytorch_model.bin") / (1024.0 * 1024.0)


def compute_model_score(model_name):
    results_dir = "results"
    model_dir = os.path.join(results_dir, model_name)

    scores = []

    # Get scores for clustering tasks
    for task in TASKS_CLUSTERING:
        task_file = os.path.join(model_dir, f"{task}.json")
        with open(task_file, 'r') as f:
            data = json.load(f)
            v_measure = data['test']['v_measure']
            scores.append(v_measure)

    # Get scores for pair classification tasks
    for task in TASKS_PAIR_CLASSIFICATION:
        task_file = os.path.join(model_dir, f"{task}.json")
        with open(task_file, 'r') as f:
            data = json.load(f)
            max_ap = data['test']['max']['ap']
            scores.append(max_ap)

    # Compute average score
    average_score = sum(scores) / len(scores)
    return average_score


DATA = {
        "Model": MODELS,
        "Model Size (MB)": [
                get_model_size(model) for model in MODELS
            ],
        "Score": [
                5  # compute_model_score(model) for model in MODELS
            ],
        "q8 Model Size (MB)": [
                get_model_size(model + "-q8") for model in MODELS
            ],
        "q8 Score": [
                compute_model_score(model + "-q8") for model in MODELS
            ],
    }

with open('data.json', 'w') as json_file:
    json.dump(DATA, json_file)