Error loading model with vllm
#1
by
ggeo
- opened
I have downloaded the model and when i try to start with vllm:
--max-model-len 1024 \
--max-num-seqs 10 \
--dtype bfloat16 \
--device=cuda \
--trust-remote-code \
--max-num-batched-tokens 24576 \
--gpu-memory-utilization 0.8 \
--host 0.0.0.0 \
--port 8080```
it gives me:
ERROR 03-20 12:24:30 [engine.py:443]
Traceback (most recent call last):
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/multiprocessing/engine.py", line 431, in run_mp_engine
engine = MQLLMEngine.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/multiprocessing/engine.py", line 126, in from_vllm_config
return cls(
^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/multiprocessing/engine.py", line 80, in __init__
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 280, in __init__
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/executor/executor_base.py", line 52, in __init__
self._init_executor()
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor
self.collective_rpc("load_model")
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/utils.py", line 2216, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 1113, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/model_loader/loader.py", line 426, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/gemma3_mm.py", line 618, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/gemma3.py", line 528, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/gemma3.py", line 452, in load_weights
weight_loader(param, loaded_weight)
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 1228, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Process SpawnProcess-1:
Traceback (most recent call last):
........
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]
[rank0]:[W320 12:24:31.921018915 ProcessGroupNCCL.cpp:1496] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
Traceback (most recent call last):
....
File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 259, in build_async_engine_client_from_engine_args
raise RuntimeError(
RuntimeError: Engine process failed to start. See stack trace for the root cause.
I have downloaded the model and when i try to start with vllm:
--max-model-len 1024 \ --max-num-seqs 10 \ --dtype bfloat16 \ --device=cuda \ --trust-remote-code \ --max-num-batched-tokens 24576 \ --gpu-memory-utilization 0.8 \ --host 0.0.0.0 \ --port 8080``` it gives me: ERROR 03-20 12:24:30 [engine.py:443] Traceback (most recent call last): File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/multiprocessing/engine.py", line 431, in run_mp_engine engine = MQLLMEngine.from_vllm_config( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/multiprocessing/engine.py", line 126, in from_vllm_config return cls( ^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/multiprocessing/engine.py", line 80, in __init__ self.engine = LLMEngine(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 280, in __init__ self.model_executor = executor_class(vllm_config=vllm_config, ) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/executor/executor_base.py", line 52, in __init__ self._init_executor() File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor self.collective_rpc("load_model") File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/utils.py", line 2216, in run_method return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/worker/worker.py", line 183, in load_model self.model_runner.load_model() File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 1113, in load_model self.model = get_model(vllm_config=self.vllm_config) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 14, in get_model return loader.load_model(vllm_config=vllm_config) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/model_loader/loader.py", line 426, in load_model loaded_weights = model.load_weights( ^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/gemma3_mm.py", line 618, in load_weights return loader.load_weights(weights) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights autoloaded_weights = set(self._load_module("", self.module, weights)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module yield from self._load_module(prefix, File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module loaded_params = module_load_weights(weights) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/gemma3.py", line 528, in load_weights return loader.load_weights(weights) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights autoloaded_weights = set(self._load_module("", self.module, weights)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module yield from self._load_module(prefix, File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module loaded_params = module_load_weights(weights) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/models/gemma3.py", line 452, in load_weights weight_loader(param, loaded_weight) File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py", line 1228, in weight_loader assert param_data.shape == loaded_weight.shape ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError Process SpawnProcess-1: Traceback (most recent call last): ........ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s] [rank0]:[W320 12:24:31.921018915 ProcessGroupNCCL.cpp:1496] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator()) Traceback (most recent call last): .... File "/home/ggous/vllm/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 259, in build_async_engine_client_from_engine_args raise RuntimeError( RuntimeError: Engine process failed to start. See stack trace for the root cause.
Use the latest version of vllm and try. Try the standard model and then our one. If the standard one doesn't work then it might not be suppoted yet