tvosch commited on
Commit
1023f27
·
1 Parent(s): ffa1281

hardcode mixed precision off for inference

Browse files
Files changed (2) hide show
  1. app.py +5 -11
  2. estimate_train_vram.py +1 -0
app.py CHANGED
@@ -150,8 +150,7 @@ def build_interface(estimate_vram_fn):
150
  return app
151
 
152
 
153
- def estimate_vram(gradio_params):
154
- print(gradio_params)
155
  model_config = ModelConfig(**filter_params_for_dataclass(ModelConfig, gradio_params))
156
  training_config = TrainingConfig(**filter_params_for_dataclass(TrainingConfig, gradio_params))
157
 
@@ -159,22 +158,17 @@ def estimate_vram(gradio_params):
159
  # Update model config
160
  if not gradio_params["repo_id"]:
161
  return "No model selected!"
162
- # If cache directory set, then download config
163
- # if gradio_params["cache_dir"]:
164
- # config = scrape_config_from_hub(gradio_params["repo_id"])
165
- # model_config.overwrite_with_hf_config(config)
166
- cache_dir="cache/"
167
  # By default, scrape config.json from hub
168
- #else:
169
  config = download_config_from_hub(gradio_params["repo_id"], cache_dir)# gradio_params["cache_dir"])
170
  model_config.overwrite_with_hf_config(config.to_dict())
171
 
172
  if training_config.train:
173
  total_vram_dict = training_vram_required(model_config, training_config)
174
- output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['gradients']}GB (gradients) + {total_vram_dict['optimizer']}GB (optimizer) + {total_vram_dict['activations']}GB activations"
175
  else: # inference
176
  total_vram_dict = inference_vram_required(model_config, training_config)
177
- output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['kv_cache']}GB (KV cache) + {total_vram_dict['activations']}GB activations"
178
  return output_str
179
 
180
  if __name__ == "__main__":
@@ -184,7 +178,7 @@ if __name__ == "__main__":
184
  # Launch gradio interface
185
  if not args.no_app:
186
  import gradio as gr
187
- estimate_vram_fn = partial(estimate_vram)
188
  interface = build_interface(estimate_vram_fn)
189
  interface.launch()
190
  # Command line interface
 
150
  return app
151
 
152
 
153
+ def estimate_vram(cache_dir, gradio_params):
 
154
  model_config = ModelConfig(**filter_params_for_dataclass(ModelConfig, gradio_params))
155
  training_config = TrainingConfig(**filter_params_for_dataclass(TrainingConfig, gradio_params))
156
 
 
158
  # Update model config
159
  if not gradio_params["repo_id"]:
160
  return "No model selected!"
161
+
 
 
 
 
162
  # By default, scrape config.json from hub
 
163
  config = download_config_from_hub(gradio_params["repo_id"], cache_dir)# gradio_params["cache_dir"])
164
  model_config.overwrite_with_hf_config(config.to_dict())
165
 
166
  if training_config.train:
167
  total_vram_dict = training_vram_required(model_config, training_config)
168
+ output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['gradients']}GB (gradients) + {total_vram_dict['optimizer']}GB (optimizer) + {total_vram_dict['activations']}GB (activations)"
169
  else: # inference
170
  total_vram_dict = inference_vram_required(model_config, training_config)
171
+ output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['kv_cache']}GB (KV cache) + {total_vram_dict['activations']}GB (activations)"
172
  return output_str
173
 
174
  if __name__ == "__main__":
 
178
  # Launch gradio interface
179
  if not args.no_app:
180
  import gradio as gr
181
+ estimate_vram_fn = partial(estimate_vram, args.cache_dir)
182
  interface = build_interface(estimate_vram_fn)
183
  interface.launch()
184
  # Command line interface
estimate_train_vram.py CHANGED
@@ -58,6 +58,7 @@ def training_vram_required(model_config, training_config):
58
 
59
 
60
  def inference_vram_required(model_config, training_config):
 
61
  # Total inference VRAM = model size + KV cache size + activations + additional overhead
62
  model_vram = model_memory(parameters=model_config.model_size,
63
  precision=model_config.precision,
 
58
 
59
 
60
  def inference_vram_required(model_config, training_config):
61
+ model_config.mixed_precision = False
62
  # Total inference VRAM = model size + KV cache size + activations + additional overhead
63
  model_vram = model_memory(parameters=model_config.model_size,
64
  precision=model_config.precision,