sofianhw commited on
Commit
24a7944
1 Parent(s): 5184fa9

lock vllm v0.4.3

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. api_server.py +1 -5
Dockerfile CHANGED
@@ -14,7 +14,7 @@ RUN pip3 install "torch==2.1.1"
14
  # This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
15
  # RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
16
  RUN pip3 install -U openai
17
- RUN pip3 install -U vllm
18
  RUN pip3 install -U pydantic
19
  RUN pip3 install -U aioprometheus
20
 
 
14
  # This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
15
  # RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
16
  RUN pip3 install -U openai
17
+ RUN pip3 install vllm==0.4.3
18
  RUN pip3 install -U pydantic
19
  RUN pip3 install -U aioprometheus
20
 
api_server.py CHANGED
@@ -29,7 +29,6 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
29
  from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
30
  from vllm.logger import init_logger
31
  from vllm.usage.usage_lib import UsageContext
32
- from vllm.utils import FlexibleArgumentParser
33
 
34
  TIMEOUT_KEEP_ALIVE = 5 # seconds
35
 
@@ -60,11 +59,8 @@ async def lifespan(app: fastapi.FastAPI):
60
 
61
  app = fastapi.FastAPI(lifespan=lifespan)
62
 
63
-
64
  def parse_args():
65
- parser_text = FlexibleArgumentParser(
66
- description="vLLM OpenAI-Compatible RESTful API server.")
67
- parser = make_arg_parser(parser_text)
68
  return parser.parse_args()
69
 
70
 
 
29
  from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
30
  from vllm.logger import init_logger
31
  from vllm.usage.usage_lib import UsageContext
 
32
 
33
  TIMEOUT_KEEP_ALIVE = 5 # seconds
34
 
 
59
 
60
  app = fastapi.FastAPI(lifespan=lifespan)
61
 
 
62
  def parse_args():
63
+ parser = make_arg_parser()
 
 
64
  return parser.parse_args()
65
 
66