Spaces:
Sleeping
Sleeping
hardcode mixed precision off for inference
Browse files- app.py +5 -11
- estimate_train_vram.py +1 -0
app.py
CHANGED
@@ -150,8 +150,7 @@ def build_interface(estimate_vram_fn):
|
|
150 |
return app
|
151 |
|
152 |
|
153 |
-
def estimate_vram(gradio_params):
|
154 |
-
print(gradio_params)
|
155 |
model_config = ModelConfig(**filter_params_for_dataclass(ModelConfig, gradio_params))
|
156 |
training_config = TrainingConfig(**filter_params_for_dataclass(TrainingConfig, gradio_params))
|
157 |
|
@@ -159,22 +158,17 @@ def estimate_vram(gradio_params):
|
|
159 |
# Update model config
|
160 |
if not gradio_params["repo_id"]:
|
161 |
return "No model selected!"
|
162 |
-
|
163 |
-
# if gradio_params["cache_dir"]:
|
164 |
-
# config = scrape_config_from_hub(gradio_params["repo_id"])
|
165 |
-
# model_config.overwrite_with_hf_config(config)
|
166 |
-
cache_dir="cache/"
|
167 |
# By default, scrape config.json from hub
|
168 |
-
#else:
|
169 |
config = download_config_from_hub(gradio_params["repo_id"], cache_dir)# gradio_params["cache_dir"])
|
170 |
model_config.overwrite_with_hf_config(config.to_dict())
|
171 |
|
172 |
if training_config.train:
|
173 |
total_vram_dict = training_vram_required(model_config, training_config)
|
174 |
-
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['gradients']}GB (gradients) + {total_vram_dict['optimizer']}GB (optimizer) + {total_vram_dict['activations']}GB activations"
|
175 |
else: # inference
|
176 |
total_vram_dict = inference_vram_required(model_config, training_config)
|
177 |
-
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['kv_cache']}GB (KV cache) + {total_vram_dict['activations']}GB activations"
|
178 |
return output_str
|
179 |
|
180 |
if __name__ == "__main__":
|
@@ -184,7 +178,7 @@ if __name__ == "__main__":
|
|
184 |
# Launch gradio interface
|
185 |
if not args.no_app:
|
186 |
import gradio as gr
|
187 |
-
estimate_vram_fn = partial(estimate_vram)
|
188 |
interface = build_interface(estimate_vram_fn)
|
189 |
interface.launch()
|
190 |
# Command line interface
|
|
|
150 |
return app
|
151 |
|
152 |
|
153 |
+
def estimate_vram(cache_dir, gradio_params):
|
|
|
154 |
model_config = ModelConfig(**filter_params_for_dataclass(ModelConfig, gradio_params))
|
155 |
training_config = TrainingConfig(**filter_params_for_dataclass(TrainingConfig, gradio_params))
|
156 |
|
|
|
158 |
# Update model config
|
159 |
if not gradio_params["repo_id"]:
|
160 |
return "No model selected!"
|
161 |
+
|
|
|
|
|
|
|
|
|
162 |
# By default, scrape config.json from hub
|
|
|
163 |
config = download_config_from_hub(gradio_params["repo_id"], cache_dir)# gradio_params["cache_dir"])
|
164 |
model_config.overwrite_with_hf_config(config.to_dict())
|
165 |
|
166 |
if training_config.train:
|
167 |
total_vram_dict = training_vram_required(model_config, training_config)
|
168 |
+
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['gradients']}GB (gradients) + {total_vram_dict['optimizer']}GB (optimizer) + {total_vram_dict['activations']}GB (activations)"
|
169 |
else: # inference
|
170 |
total_vram_dict = inference_vram_required(model_config, training_config)
|
171 |
+
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['kv_cache']}GB (KV cache) + {total_vram_dict['activations']}GB (activations)"
|
172 |
return output_str
|
173 |
|
174 |
if __name__ == "__main__":
|
|
|
178 |
# Launch gradio interface
|
179 |
if not args.no_app:
|
180 |
import gradio as gr
|
181 |
+
estimate_vram_fn = partial(estimate_vram, args.cache_dir)
|
182 |
interface = build_interface(estimate_vram_fn)
|
183 |
interface.launch()
|
184 |
# Command line interface
|
estimate_train_vram.py
CHANGED
@@ -58,6 +58,7 @@ def training_vram_required(model_config, training_config):
|
|
58 |
|
59 |
|
60 |
def inference_vram_required(model_config, training_config):
|
|
|
61 |
# Total inference VRAM = model size + KV cache size + activations + additional overhead
|
62 |
model_vram = model_memory(parameters=model_config.model_size,
|
63 |
precision=model_config.precision,
|
|
|
58 |
|
59 |
|
60 |
def inference_vram_required(model_config, training_config):
|
61 |
+
model_config.mixed_precision = False
|
62 |
# Total inference VRAM = model size + KV cache size + activations + additional overhead
|
63 |
model_vram = model_memory(parameters=model_config.model_size,
|
64 |
precision=model_config.precision,
|