Spaces:
Sleeping
Sleeping
Alan Liu
commited on
Commit
•
989cd20
1
Parent(s):
3698d0a
fix bug
Browse files
app.py
CHANGED
@@ -132,7 +132,7 @@ with col2:
|
|
132 |
with col3: # Prefilling
|
133 |
prefilling_operation_count = prefilling_operation(model_config, inference_config)
|
134 |
inference_info['inference_prefilling_time'] = prefilling_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
135 |
-
inference_info['inference_prefilling_throughput'] = inference_config['input_seq_length']/inference_info['inference_prefilling_time']
|
136 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * inference_config['input_seq_length']))
|
137 |
|
138 |
operation_items = {key: "{:,}".format(int(prefilling_operation_count[key])) for key in prefilling_operation_count if key not in subtotal_operations}
|
@@ -157,8 +157,8 @@ with col3: # Prefilling
|
|
157 |
with col4: # Prefilling
|
158 |
generation_operation_count = generation_operation(model_config, inference_config)
|
159 |
inference_info['inference_generation_time'] = generation_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
160 |
-
inference_info['inference_generation_throughput'] = inference_config['output_seq_length']/inference_info['inference_generation_time']
|
161 |
-
inference_info['inference_client_generation_throughput'] = inference_config['output_seq_length'] / (inference_info['inference_prefilling_time'] + inference_info['inference_generation_time'])
|
162 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * (inference_config['input_seq_length']+inference_config['output_seq_length'])))
|
163 |
|
164 |
operation_items = {key: "{:,}".format(int(generation_operation_count[key])) for key in generation_operation_count if key not in subtotal_operations}
|
|
|
132 |
with col3: # Prefilling
|
133 |
prefilling_operation_count = prefilling_operation(model_config, inference_config)
|
134 |
inference_info['inference_prefilling_time'] = prefilling_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
135 |
+
inference_info['inference_prefilling_throughput'] = inference_config['input_seq_length']*inference_config['batchsize']/inference_info['inference_prefilling_time']
|
136 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * inference_config['input_seq_length']))
|
137 |
|
138 |
operation_items = {key: "{:,}".format(int(prefilling_operation_count[key])) for key in prefilling_operation_count if key not in subtotal_operations}
|
|
|
157 |
with col4: # Prefilling
|
158 |
generation_operation_count = generation_operation(model_config, inference_config)
|
159 |
inference_info['inference_generation_time'] = generation_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
160 |
+
inference_info['inference_generation_throughput'] = inference_config['output_seq_length']*inference_config['batchsize']/inference_info['inference_generation_time']
|
161 |
+
inference_info['inference_client_generation_throughput'] = inference_config['output_seq_length']*inference_config['batchsize'] / (inference_info['inference_prefilling_time'] + inference_info['inference_generation_time'])
|
162 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * (inference_config['input_seq_length']+inference_config['output_seq_length'])))
|
163 |
|
164 |
operation_items = {key: "{:,}".format(int(generation_operation_count[key])) for key in generation_operation_count if key not in subtotal_operations}
|