Felix Marty commited on
Commit
6e19ff8
1 Parent(s): 0325bda

turn off aws instances

Browse files
Files changed (2) hide show
  1. app.py +6 -0
  2. defaults.py +12 -12
app.py CHANGED
@@ -80,6 +80,12 @@ with gr.Blocks() as demo:
80
  "## Speed up inference and support more workload with PyTorch's BetterTransformer 🤗"
81
  )
82
 
 
 
 
 
 
 
83
  gr.Markdown(
84
  """
85
  Let's try out [BetterTransformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) + [TorchServe](https://pytorch.org/serve/)!
 
80
  "## Speed up inference and support more workload with PyTorch's BetterTransformer 🤗"
81
  )
82
 
83
+ gr.Markdown(
84
+ """
85
+ **The two AWS instances powering this Space are offline (to save us the $$$). Feel free to reproduce using [this backend code](https://github.com/fxmarty/bettertransformer_demo). The example results are from an AWS EC2 g4dn.xlarge instance with a single NVIDIA T4 GPU.**
86
+ """
87
+ )
88
+
89
  gr.Markdown(
90
  """
91
  Let's try out [BetterTransformer](https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/) + [TorchServe](https://pytorch.org/serve/)!
defaults.py CHANGED
@@ -1,35 +1,35 @@
1
  defaults_vanilla_single = {
2
  "status": 200,
3
  "prediction": "Positive",
4
- "inf_latency": 7.66,
5
  "peak_gpu_memory": 2706.21,
6
- "end_to_end_latency": 309.65,
7
  }
8
 
9
  defaults_bt_single = {
10
  "status": 200,
11
  "prediction": "Positive",
12
- "inf_latency": 6.01,
13
  "peak_gpu_memory": 2706.22,
14
- "end_to_end_latency": 303.53,
15
  }
16
 
17
  defaults_vanilla_spam = {
18
- "throughput": 28.04,
19
- "mean_inference_latency": 24.43,
20
- "mean_peak_gpu_memory": 2907.92,
21
  "mean_padding_ratio": 69.53,
22
  "mean_sequence_length": 128.0,
23
- "effective_batch_size": 4.3,
24
  }
25
 
26
  defaults_bt_spam = {
27
- "throughput": 38.53,
28
- "mean_inference_latency": 12.73,
29
- "mean_peak_gpu_memory": 2761.64,
30
  "mean_padding_ratio": 69.53,
31
  "mean_sequence_length": 128.0,
32
- "effective_batch_size": 4.7,
33
  }
34
 
35
  BATCH_SIZE = 8 # fixed!
 
1
  defaults_vanilla_single = {
2
  "status": 200,
3
  "prediction": "Positive",
4
+ "inf_latency": 6.25,
5
  "peak_gpu_memory": 2706.21,
6
+ "end_to_end_latency": 81.95,
7
  }
8
 
9
  defaults_bt_single = {
10
  "status": 200,
11
  "prediction": "Positive",
12
+ "inf_latency": 4.96,
13
  "peak_gpu_memory": 2706.22,
14
+ "end_to_end_latency": 78.69,
15
  }
16
 
17
  defaults_vanilla_spam = {
18
+ "throughput": 184.58,
19
+ "mean_inference_latency": 32.2,
20
+ "mean_peak_gpu_memory": 3046.26,
21
  "mean_padding_ratio": 69.53,
22
  "mean_sequence_length": 128.0,
23
+ "effective_batch_size": 8.0,
24
  }
25
 
26
  defaults_bt_spam = {
27
+ "throughput": 312.21,
28
+ "mean_inference_latency": 14.42,
29
+ "mean_peak_gpu_memory": 2798.78,
30
  "mean_padding_ratio": 69.53,
31
  "mean_sequence_length": 128.0,
32
+ "effective_batch_size": 8.0,
33
  }
34
 
35
  BATCH_SIZE = 8 # fixed!