Spaces:
Running
Running
Commit
β’
a1f6c2e
1
Parent(s):
bd9edb7
update viz
Browse files- src/latency_score_memory.py +1 -1
- src/llm_perf.py +9 -8
src/latency_score_memory.py
CHANGED
@@ -15,7 +15,7 @@ SCORE_MEMORY_LATENCY_DATA = [
|
|
15 |
"Decode Throughput (tokens/s)",
|
16 |
"Allocated Memory (MB)",
|
17 |
"E2E Latency (s)",
|
18 |
-
"E2E Throughput (tokens/s)",
|
19 |
]
|
20 |
|
21 |
|
|
|
15 |
"Decode Throughput (tokens/s)",
|
16 |
"Allocated Memory (MB)",
|
17 |
"E2E Latency (s)",
|
18 |
+
# "E2E Throughput (tokens/s)",
|
19 |
]
|
20 |
|
21 |
|
src/llm_perf.py
CHANGED
@@ -12,22 +12,23 @@ COLUMNS_MAPPING = {
|
|
12 |
"Model": "Model π€",
|
13 |
"Arch": "Arch ποΈ",
|
14 |
"Size": "Params (B)",
|
15 |
-
|
16 |
-
# deployment settings
|
17 |
-
"backend.name": "Backend π",
|
18 |
-
"backend.torch_dtype": "DType π₯",
|
19 |
-
"optimization": "Optimization π οΈ",
|
20 |
-
"quantization": "Quantization ποΈ",
|
21 |
# primary measurements
|
22 |
"forward.latency(s)": "Prefill Latency (s)",
|
23 |
"decode.throughput(tokens/s)": "Decode Throughput (tokens/s)",
|
24 |
"generate.max_memory_allocated(MB)": "Allocated Memory (MB)",
|
25 |
"generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh)",
|
|
|
|
|
|
|
|
|
|
|
26 |
# additional measurements
|
|
|
27 |
"generate.latency(s)": "E2E Latency (s)",
|
28 |
"generate.throughput(tokens/s)": "E2E Throughput (tokens/s)",
|
29 |
-
"generate.max_memory_reserved(MB)": "Reserved Memory (MB)",
|
30 |
-
"generate.max_memory_used(MB)": "Used Memory (MB)",
|
31 |
}
|
32 |
SORTING_COLUMNS = [
|
33 |
"Open LLM Score (%)",
|
|
|
12 |
"Model": "Model π€",
|
13 |
"Arch": "Arch ποΈ",
|
14 |
"Size": "Params (B)",
|
15 |
+
|
|
|
|
|
|
|
|
|
|
|
16 |
# primary measurements
|
17 |
"forward.latency(s)": "Prefill Latency (s)",
|
18 |
"decode.throughput(tokens/s)": "Decode Throughput (tokens/s)",
|
19 |
"generate.max_memory_allocated(MB)": "Allocated Memory (MB)",
|
20 |
"generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh)",
|
21 |
+
# deployment settings
|
22 |
+
"backend.name": "Backend π",
|
23 |
+
"backend.torch_dtype": "DType π₯",
|
24 |
+
"optimization": "Optimization π οΈ",
|
25 |
+
"quantization": "Quantization ποΈ",
|
26 |
# additional measurements
|
27 |
+
"Score": "Open LLM Score (%)",
|
28 |
"generate.latency(s)": "E2E Latency (s)",
|
29 |
"generate.throughput(tokens/s)": "E2E Throughput (tokens/s)",
|
30 |
+
# "generate.max_memory_reserved(MB)": "Reserved Memory (MB)",
|
31 |
+
# "generate.max_memory_used(MB)": "Used Memory (MB)",
|
32 |
}
|
33 |
SORTING_COLUMNS = [
|
34 |
"Open LLM Score (%)",
|