Spaces:
Running
Running
Felix Marty
commited on
Commit
•
6e19ff8
1
Parent(s):
0325bda
turn off aws instances
Browse files- app.py +6 -0
- defaults.py +12 -12
app.py
CHANGED
@@ -80,6 +80,12 @@ with gr.Blocks() as demo:
|
|
80 |
"## Speed up inference and support more workload with PyTorch's BetterTransformer 🤗"
|
81 |
)
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
gr.Markdown(
|
84 |
"""
|
85 |
Let's try out [BetterTransformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) + [TorchServe](https://pytorch.org/serve/)!
|
|
|
80 |
"## Speed up inference and support more workload with PyTorch's BetterTransformer 🤗"
|
81 |
)
|
82 |
|
83 |
+
gr.Markdown(
|
84 |
+
"""
|
85 |
+
**The two AWS instances powering this Space are offline (to save us the $$$). Feel free to reproduce using [this backend code](https://github.com/fxmarty/bettertransformer_demo). The example results are from an AWS EC2 g4dn.xlarge instance with a single NVIDIA T4 GPU.**
|
86 |
+
"""
|
87 |
+
)
|
88 |
+
|
89 |
gr.Markdown(
|
90 |
"""
|
91 |
Let's try out [BetterTransformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) + [TorchServe](https://pytorch.org/serve/)!
|
defaults.py
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
defaults_vanilla_single = {
|
2 |
"status": 200,
|
3 |
"prediction": "Positive",
|
4 |
-
"inf_latency":
|
5 |
"peak_gpu_memory": 2706.21,
|
6 |
-
"end_to_end_latency":
|
7 |
}
|
8 |
|
9 |
defaults_bt_single = {
|
10 |
"status": 200,
|
11 |
"prediction": "Positive",
|
12 |
-
"inf_latency":
|
13 |
"peak_gpu_memory": 2706.22,
|
14 |
-
"end_to_end_latency":
|
15 |
}
|
16 |
|
17 |
defaults_vanilla_spam = {
|
18 |
-
"throughput":
|
19 |
-
"mean_inference_latency":
|
20 |
-
"mean_peak_gpu_memory":
|
21 |
"mean_padding_ratio": 69.53,
|
22 |
"mean_sequence_length": 128.0,
|
23 |
-
"effective_batch_size":
|
24 |
}
|
25 |
|
26 |
defaults_bt_spam = {
|
27 |
-
"throughput":
|
28 |
-
"mean_inference_latency":
|
29 |
-
"mean_peak_gpu_memory":
|
30 |
"mean_padding_ratio": 69.53,
|
31 |
"mean_sequence_length": 128.0,
|
32 |
-
"effective_batch_size":
|
33 |
}
|
34 |
|
35 |
BATCH_SIZE = 8 # fixed!
|
|
|
1 |
defaults_vanilla_single = {
|
2 |
"status": 200,
|
3 |
"prediction": "Positive",
|
4 |
+
"inf_latency": 6.25,
|
5 |
"peak_gpu_memory": 2706.21,
|
6 |
+
"end_to_end_latency": 81.95,
|
7 |
}
|
8 |
|
9 |
defaults_bt_single = {
|
10 |
"status": 200,
|
11 |
"prediction": "Positive",
|
12 |
+
"inf_latency": 4.96,
|
13 |
"peak_gpu_memory": 2706.22,
|
14 |
+
"end_to_end_latency": 78.69,
|
15 |
}
|
16 |
|
17 |
defaults_vanilla_spam = {
|
18 |
+
"throughput": 184.58,
|
19 |
+
"mean_inference_latency": 32.2,
|
20 |
+
"mean_peak_gpu_memory": 3046.26,
|
21 |
"mean_padding_ratio": 69.53,
|
22 |
"mean_sequence_length": 128.0,
|
23 |
+
"effective_batch_size": 8.0,
|
24 |
}
|
25 |
|
26 |
defaults_bt_spam = {
|
27 |
+
"throughput": 312.21,
|
28 |
+
"mean_inference_latency": 14.42,
|
29 |
+
"mean_peak_gpu_memory": 2798.78,
|
30 |
"mean_padding_ratio": 69.53,
|
31 |
"mean_sequence_length": 128.0,
|
32 |
+
"effective_batch_size": 8.0,
|
33 |
}
|
34 |
|
35 |
BATCH_SIZE = 8 # fixed!
|