File size: 2,466 Bytes
ffeea82
 
fd2274b
 
ffeea82
fd2274b
 
 
 
 
ffeea82
 
 
 
 
 
 
 
 
 
 
7deed59
 
 
 
 
 
ffeea82
ecb4c36
ffeea82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7deed59
fd2274b
ffeea82
 
7deed59
 
fd2274b
 
 
 
 
 
 
 
 
 
 
 
 
0a9d8ad
ffeea82
 
 
7deed59
ffeea82
8ddd7d5
0a9d8ad
 
 
fd2274b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from collections import OrderedDict

import gradio as gr
import plotly.graph_objects as go
from datasets import load_dataset, Dataset
from huggingface_hub import list_datasets

pipelines = [d.id[20:-21] for d in list_datasets(author='open-source-metrics') if 'checkpoint-downloads' in d.id]


def sum_with_none(iterator):
    return sum([v for v in iterator if v is not None])


def merge_columns(dataset: Dataset, max_number_of_columns: int):
    downloads = {col: sum_with_none(dataset[col]) for col in dataset.column_names if col != 'dates'}
    sorted_downloads = OrderedDict(sorted(downloads.items(), key=lambda x: x[1], reverse=True))

    to_merge = list(sorted_downloads.keys())[max_number_of_columns:]
    to_keep = list(sorted_downloads.keys())[:max_number_of_columns]

    if 'no_arch' in to_merge:
        to_merge.remove('no_arch')

    if 'no_arch' in to_keep:
        to_keep.remove('no_arch')

    dictionary = dataset.to_dict()
    dictionary['combined'] = dictionary.pop('no_arch')

    while len(to_merge):
        current = dictionary['combined']
        to_add = dictionary.pop(to_merge.pop(0))

        for i in range(len(current)):
            if current[i] is None:
                current[i] = 0

            if to_add[i] is None:
                to_add[i] = 0

            current[i] += to_add[i]

        dictionary['combined'] = current

    dataset = Dataset.from_dict(dictionary)

    return dataset


def plot(library: str, stacked: bool, number_of_architectures_to_show: int):
    dataset = load_dataset(f"open-source-metrics/{library}-checkpoint-downloads")['train']
    n_archs = len(dataset.column_names) - 1  # Remove dates

    if n_archs > number_of_architectures_to_show:
        dataset = merge_columns(dataset, number_of_architectures_to_show)

    dates = dataset['dates']
    axis = dataset.column_names
    axis.remove('dates')

    fig = go.Figure()
    for i in axis:
        fig.add_trace(
            go.Scatter(x=dates, y=dataset[i], mode='lines+markers', name=i, stackgroup='one' if stacked else None)
        )
    return fig


with gr.Blocks() as demo:
    inputs = [
        gr.Dropdown(pipelines),
        gr.Checkbox(label='Stacked'),
        gr.Slider(minimum=1, maximum=len(pipelines), value=3, step=1, label="Max number of architectures to show")
    ]
    submit = gr.Button('Submit')
    with gr.Row():
        outputs = [gr.Plot()]
    submit.click(fn=plot, inputs=inputs, outputs=outputs)

demo.launch()