davanstrien HF staff commited on
Commit
5abd884
·
verified ·
0 Parent(s):
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +193 -0
  4. requirements.in +3 -0
  5. requirements.txt +224 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Argilla Progress
3
+ emoji: 🚀
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import datetime
3
+ import pandas as pd
4
+ import gradio as gr
5
+ import argilla as rg
6
+ import plotly.graph_objects as go
7
+ import plotly.colors as colors
8
+
9
+ client = rg.Argilla(
10
+ api_url=os.getenv("ARGILLA_API_URL"), api_key=os.getenv("ARGILLA_API_KEY")
11
+ )
12
+
13
+
14
+ def fetch_data(dataset_name: str, workspace: str):
15
+ return client.datasets(dataset_name, workspace=workspace)
16
+
17
+
18
+ def get_progress(dataset) -> dict:
19
+ records = list(dataset.records)
20
+ total_records = len(records)
21
+ annotated_records = len(
22
+ [record.status for record in records if record.status == "completed"]
23
+ )
24
+ progress = (annotated_records / total_records) * 100 if total_records > 0 else 0
25
+ return {
26
+ "total": total_records,
27
+ "annotated": annotated_records,
28
+ "progress": progress,
29
+ }
30
+
31
+
32
+ def get_leaderboard(dataset) -> dict:
33
+ user_annotations = {}
34
+ for record in dataset.records:
35
+ for response in record.responses:
36
+ user = response.user_id
37
+ retrieved_user = client.users(id=user)
38
+ user = retrieved_user.username
39
+ if user not in user_annotations:
40
+ user_annotations[user] = 0
41
+ user_annotations[user] += 1
42
+ print(user_annotations)
43
+ return user_annotations
44
+
45
+
46
+ def create_gauge_chart(progress):
47
+ fig = go.Figure(
48
+ go.Indicator(
49
+ mode="gauge+number+delta",
50
+ value=progress["progress"],
51
+ title={"text": "Dataset Annotation Progress", "font": {"size": 24}},
52
+ delta={"reference": 100, "increasing": {"color": "RebeccaPurple"}},
53
+ number={"font": {"size": 40}, "valueformat": ".1f", "suffix": "%"},
54
+ gauge={
55
+ "axis": {"range": [None, 100], "tickwidth": 1, "tickcolor": "darkblue"},
56
+ "bar": {"color": "deepskyblue"},
57
+ "bgcolor": "white",
58
+ "borderwidth": 2,
59
+ "bordercolor": "gray",
60
+ "steps": [
61
+ {"range": [0, progress["progress"]], "color": "royalblue"},
62
+ {"range": [progress["progress"], 100], "color": "lightgray"},
63
+ ],
64
+ "threshold": {
65
+ "line": {"color": "red", "width": 4},
66
+ "thickness": 0.75,
67
+ "value": 100,
68
+ },
69
+ },
70
+ )
71
+ )
72
+
73
+ fig.update_layout(
74
+ annotations=[
75
+ dict(
76
+ text=(
77
+ f"Total records: {progress['total']}<br>"
78
+ f"Annotated: {progress['annotated']} ({progress['progress']:.1f}%)<br>"
79
+ f"Remaining: {progress['total'] - progress['annotated']} ({100 - progress['progress']:.1f}%)"
80
+ ),
81
+ # x=0.5,
82
+ # y=-0.2,
83
+ showarrow=False,
84
+ xref="paper",
85
+ yref="paper",
86
+ font=dict(size=16),
87
+ )
88
+ ],
89
+ )
90
+
91
+ fig.add_annotation(
92
+ text=(
93
+ f"Current Progress: {progress['progress']:.1f}% complete<br>"
94
+ f"({progress['annotated']} out of {progress['total']} records annotated)"
95
+ ),
96
+ xref="paper",
97
+ yref="paper",
98
+ x=0.5,
99
+ y=1.1,
100
+ showarrow=False,
101
+ font=dict(size=18),
102
+ align="center",
103
+ )
104
+
105
+ return fig
106
+
107
+
108
+ def create_treemap(user_annotations, total_records):
109
+ sorted_users = sorted(user_annotations.items(), key=lambda x: x[1], reverse=True)
110
+ color_scale = colors.qualitative.Pastel + colors.qualitative.Set3
111
+
112
+ labels, parents, values, text, user_colors = [], [], [], [], []
113
+
114
+ for i, (user, contribution) in enumerate(sorted_users):
115
+ percentage = (contribution / total_records) * 100
116
+ labels.append(user)
117
+ parents.append("Annotations")
118
+ values.append(contribution)
119
+ text.append(f"{contribution} annotations<br>{percentage:.2f}%")
120
+ user_colors.append(color_scale[i % len(color_scale)])
121
+
122
+ labels.append("Annotations")
123
+ parents.append("")
124
+ values.append(total_records)
125
+ text.append(f"Total: {total_records} annotations")
126
+ user_colors.append("#FFFFFF")
127
+
128
+ fig = go.Figure(
129
+ go.Treemap(
130
+ labels=labels,
131
+ parents=parents,
132
+ values=values,
133
+ text=text,
134
+ textinfo="label+text",
135
+ hoverinfo="label+text+value",
136
+ marker=dict(colors=user_colors, line=dict(width=2)),
137
+ )
138
+ )
139
+
140
+ fig.update_layout(
141
+ title_text="User contributions to the total end dataset",
142
+ height=500,
143
+ margin=dict(l=10, r=10, t=50, b=10),
144
+ paper_bgcolor="#F0F0F0", # Light gray background
145
+ plot_bgcolor="#F0F0F0", # Light gray background
146
+ )
147
+
148
+ return fig
149
+
150
+
151
+ def update_dashboard():
152
+ dataset = fetch_data(os.getenv("DATASET_NAME"), os.getenv("WORKSPACE"))
153
+ progress = get_progress(dataset)
154
+ user_annotations = get_leaderboard(dataset)
155
+
156
+ gauge_chart = create_gauge_chart(progress)
157
+ treemap = create_treemap(user_annotations, progress["total"])
158
+
159
+ leaderboard_df = pd.DataFrame(
160
+ list(user_annotations.items()), columns=["User", "Annotations"]
161
+ )
162
+ leaderboard_df = leaderboard_df.sort_values(
163
+ "Annotations", ascending=False
164
+ ).reset_index(drop=True)
165
+
166
+ return gauge_chart, treemap, leaderboard_df
167
+
168
+
169
+ with gr.Blocks() as demo:
170
+ gr.Markdown("# Argilla Dataset Dashboard")
171
+
172
+ with gr.Row():
173
+ gauge_output = gr.Plot(label="Overall Progress")
174
+ treemap_output = gr.Plot(label="User contributions")
175
+
176
+ with gr.Row():
177
+ leaderboard_output = gr.Dataframe(
178
+ label="Leaderboard", headers=["User", "Annotations"]
179
+ )
180
+
181
+ demo.load(
182
+ update_dashboard,
183
+ inputs=None,
184
+ outputs=[gauge_output, treemap_output, leaderboard_output],
185
+ )
186
+ gr.Button("Refresh").click(
187
+ update_dashboard,
188
+ inputs=None,
189
+ outputs=[gauge_output, treemap_output, leaderboard_output],
190
+ )
191
+
192
+ if __name__ == "__main__":
193
+ demo.launch()
requirements.in ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ argilla
3
+ plotly
requirements.txt ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in -o requirements.txt
3
+ aiofiles==23.2.1
4
+ # via gradio
5
+ aiohappyeyeballs==2.4.0
6
+ # via aiohttp
7
+ aiohttp==3.10.5
8
+ # via
9
+ # datasets
10
+ # fsspec
11
+ aiosignal==1.3.1
12
+ # via aiohttp
13
+ annotated-types==0.7.0
14
+ # via pydantic
15
+ anyio==4.4.0
16
+ # via
17
+ # gradio
18
+ # httpx
19
+ # starlette
20
+ argilla==2.0.1
21
+ # via -r requirements.in
22
+ attrs==24.2.0
23
+ # via aiohttp
24
+ certifi==2024.7.4
25
+ # via
26
+ # httpcore
27
+ # httpx
28
+ # requests
29
+ charset-normalizer==3.3.2
30
+ # via requests
31
+ click==8.1.7
32
+ # via
33
+ # typer
34
+ # uvicorn
35
+ contourpy==1.3.0
36
+ # via matplotlib
37
+ cycler==0.12.1
38
+ # via matplotlib
39
+ datasets==2.21.0
40
+ # via argilla
41
+ dill==0.3.8
42
+ # via
43
+ # datasets
44
+ # multiprocess
45
+ fastapi==0.112.2
46
+ # via gradio
47
+ ffmpy==0.4.0
48
+ # via gradio
49
+ filelock==3.15.4
50
+ # via
51
+ # datasets
52
+ # huggingface-hub
53
+ fonttools==4.53.1
54
+ # via matplotlib
55
+ frozenlist==1.4.1
56
+ # via
57
+ # aiohttp
58
+ # aiosignal
59
+ fsspec==2024.6.1
60
+ # via
61
+ # datasets
62
+ # gradio-client
63
+ # huggingface-hub
64
+ gradio==4.42.0
65
+ # via -r requirements.in
66
+ gradio-client==1.3.0
67
+ # via gradio
68
+ h11==0.14.0
69
+ # via
70
+ # httpcore
71
+ # uvicorn
72
+ httpcore==1.0.5
73
+ # via httpx
74
+ httpx==0.27.2
75
+ # via
76
+ # argilla
77
+ # gradio
78
+ # gradio-client
79
+ huggingface-hub==0.24.6
80
+ # via
81
+ # argilla
82
+ # datasets
83
+ # gradio
84
+ # gradio-client
85
+ idna==3.8
86
+ # via
87
+ # anyio
88
+ # httpx
89
+ # requests
90
+ # yarl
91
+ importlib-resources==6.4.4
92
+ # via gradio
93
+ jinja2==3.1.4
94
+ # via gradio
95
+ kiwisolver==1.4.5
96
+ # via matplotlib
97
+ markdown-it-py==3.0.0
98
+ # via rich
99
+ markupsafe==2.1.5
100
+ # via
101
+ # gradio
102
+ # jinja2
103
+ matplotlib==3.9.2
104
+ # via gradio
105
+ mdurl==0.1.2
106
+ # via markdown-it-py
107
+ multidict==6.0.5
108
+ # via
109
+ # aiohttp
110
+ # yarl
111
+ multiprocess==0.70.16
112
+ # via datasets
113
+ numpy==2.1.0
114
+ # via
115
+ # contourpy
116
+ # datasets
117
+ # gradio
118
+ # matplotlib
119
+ # pandas
120
+ # pyarrow
121
+ orjson==3.10.7
122
+ # via gradio
123
+ packaging==24.1
124
+ # via
125
+ # datasets
126
+ # gradio
127
+ # gradio-client
128
+ # huggingface-hub
129
+ # matplotlib
130
+ # plotly
131
+ pandas==2.2.2
132
+ # via
133
+ # datasets
134
+ # gradio
135
+ pillow==10.4.0
136
+ # via
137
+ # gradio
138
+ # matplotlib
139
+ plotly==5.23.0
140
+ # via -r requirements.in
141
+ pyarrow==17.0.0
142
+ # via datasets
143
+ pydantic==2.8.2
144
+ # via
145
+ # argilla
146
+ # fastapi
147
+ # gradio
148
+ pydantic-core==2.20.1
149
+ # via pydantic
150
+ pydub==0.25.1
151
+ # via gradio
152
+ pygments==2.18.0
153
+ # via rich
154
+ pyparsing==3.1.4
155
+ # via matplotlib
156
+ python-dateutil==2.9.0.post0
157
+ # via
158
+ # matplotlib
159
+ # pandas
160
+ python-multipart==0.0.9
161
+ # via gradio
162
+ pytz==2024.1
163
+ # via pandas
164
+ pyyaml==6.0.2
165
+ # via
166
+ # datasets
167
+ # gradio
168
+ # huggingface-hub
169
+ requests==2.32.3
170
+ # via
171
+ # datasets
172
+ # huggingface-hub
173
+ rich==13.8.0
174
+ # via
175
+ # argilla
176
+ # typer
177
+ ruff==0.6.3
178
+ # via gradio
179
+ semantic-version==2.10.0
180
+ # via gradio
181
+ shellingham==1.5.4
182
+ # via typer
183
+ six==1.16.0
184
+ # via python-dateutil
185
+ sniffio==1.3.1
186
+ # via
187
+ # anyio
188
+ # httpx
189
+ starlette==0.38.2
190
+ # via fastapi
191
+ tenacity==9.0.0
192
+ # via plotly
193
+ tomlkit==0.12.0
194
+ # via gradio
195
+ tqdm==4.66.5
196
+ # via
197
+ # argilla
198
+ # datasets
199
+ # huggingface-hub
200
+ typer==0.12.5
201
+ # via gradio
202
+ typing-extensions==4.12.2
203
+ # via
204
+ # fastapi
205
+ # gradio
206
+ # gradio-client
207
+ # huggingface-hub
208
+ # pydantic
209
+ # pydantic-core
210
+ # typer
211
+ tzdata==2024.1
212
+ # via pandas
213
+ urllib3==2.2.2
214
+ # via
215
+ # gradio
216
+ # requests
217
+ uvicorn==0.30.6
218
+ # via gradio
219
+ websockets==12.0
220
+ # via gradio-client
221
+ xxhash==3.5.0
222
+ # via datasets
223
+ yarl==1.9.4
224
+ # via aiohttp