sofianhw commited on
Commit
f7fd884
1 Parent(s): 2695082
Files changed (2) hide show
  1. Dockerfile +31 -37
  2. entrypoint.sh +11 -0
Dockerfile CHANGED
@@ -1,47 +1,41 @@
1
- ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
2
- FROM nvidia/cuda:${CUDA_IMAGE}
3
-
4
- # We need to set the host to 0.0.0.0 to allow outside access
5
- ENV HOST 0.0.0.0
6
-
7
- RUN apt-get update && apt-get upgrade -y \
8
- && apt-get install -y git build-essential \
9
- python3 python3-pip gcc wget \
10
- ocl-icd-opencl-dev opencl-headers clinfo \
11
- libclblast-dev libopenblas-dev \
12
- && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
13
-
14
- COPY . .
15
-
16
- # setting build related env vars
17
- ENV CUDA_DOCKER_ARCH=all
18
- ENV LLAMA_CUBLAS=1
19
-
20
- # Install depencencies
21
- RUN python3 -m pip install --upgrade pip pytest cmake \
22
- scikit-build setuptools fastapi uvicorn sse-starlette \
23
- pydantic-settings huggingface_hub hf_transfer
24
-
25
- # Install llama-cpp-python (build with cuda)
26
- RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
27
 
 
28
  RUN useradd -m -u 1000 user
 
29
  # Switch to the "user" user
30
  USER user
 
31
  # Set home to the user's home directory
32
  ENV HOME=/home/user \
33
- PATH=/home/user/.local/bin:$PATH \
34
- PYTHONPATH=$HOME/app \
35
- PYTHONUNBUFFERED=1 \
36
- GRADIO_ALLOW_FLAGGING=never \
37
- GRADIO_NUM_PORTS=1 \
38
- GRADIO_SERVER_NAME=0.0.0.0 \
39
- GRADIO_THEME=huggingface \
40
- SYSTEM=spaces
41
 
 
42
  WORKDIR $HOME/app
43
 
44
- # Copy the current directory contents into the container at $HOME/app setting the owner to the user
45
- COPY --chown=user . $HOME/app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM --platform=amd64 nvcr.io/nvidia/cuda:12.1.0-devel-ubuntu22.04 as base
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ # Set up a new user named "user" with user ID 1000
4
  RUN useradd -m -u 1000 user
5
+
6
  # Switch to the "user" user
7
  USER user
8
+
9
  # Set home to the user's home directory
10
  ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
 
 
 
 
 
 
 
12
 
13
+ # Set the working directory to the user's home directory
14
  WORKDIR $HOME/app
15
 
16
+ RUN apt update && \
17
+ apt install -y python3-pip python3-packaging \
18
+ git ninja-build && \
19
+ pip3 install -U pip
20
+
21
+ # Tweak this list to reduce build time
22
+ # https://developer.nvidia.com/cuda-gpus
23
+ ENV TORCH_CUDA_ARCH_LIST "7.0;7.2;7.5;8.0;8.6;8.9;9.0"
24
+
25
+ RUN pip3 install "torch==2.1.1"
26
+
27
+ # This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
28
+ RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
29
+ RUN pip3 install "git+https://github.com/vllm-project/vllm.git"
30
+ RUN pip3 install "xformers==0.0.23" "transformers==4.36.0" "fschat[model_worker]==0.2.34"
31
+
32
+ RUN git clone https://github.com/NVIDIA/apex && \
33
+ cd apex && git checkout 2386a912164b0c5cfcd8be7a2b890fbac5607c82 && \
34
+ sed -i '/check_cuda_torch_binary_vs_bare_metal(CUDA_HOME)/d' setup.py && \
35
+ python3 setup.py install --cpp_ext --cuda_ext
36
+
37
+ COPY entrypoint.sh .
38
+
39
+ RUN chmod +x $HOME/app/entrypoint.sh
40
 
41
+ ENTRYPOINT ["$HOME/app/entrypoint.sh"]
entrypoint.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ if [[ ! -z "${HF_TOKEN}" ]]; then
4
+ echo "The HF_TOKEN environment variable set, logging to Hugging Face."
5
+ python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
6
+ else
7
+ echo "The HF_TOKEN environment variable is not set or empty, not logging to Hugging Face."
8
+ fi
9
+
10
+ # Run the provided command
11
+ exec python3 -u -m vllm.entrypoints.openai.api_server "${HF_MODEL}" --host "0.0.0.0" --port 7860