Spaces:

sofianhw
/

test-docker

Paused

sofianhw commited on Jan 22

Commit

f7fd884

•

1 Parent(s): 2695082

vllm

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,47 +1,41 @@
-ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
-FROM nvidia/cuda:${CUDA_IMAGE}
-# We need to set the host to 0.0.0.0 to allow outside access
-ENV HOST 0.0.0.0
-RUN apt-get update && apt-get upgrade -y \
-    && apt-get install -y git build-essential \
-    python3 python3-pip gcc wget \
-    ocl-icd-opencl-dev opencl-headers clinfo \
-    libclblast-dev libopenblas-dev \
-    && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
-COPY . .
-# setting build related env vars
-ENV CUDA_DOCKER_ARCH=all
-ENV LLAMA_CUBLAS=1
-# Install depencencies
-RUN python3 -m pip install --upgrade pip pytest cmake \
-    scikit-build setuptools fastapi uvicorn sse-starlette \
-    pydantic-settings huggingface_hub hf_transfer
-# Install llama-cpp-python (build with cuda)
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
 RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
 ENV HOME=/home/user \
-	PATH=/home/user/.local/bin:$PATH \
-    PYTHONPATH=$HOME/app \
-	PYTHONUNBUFFERED=1 \
-	GRADIO_ALLOW_FLAGGING=never \
-	GRADIO_NUM_PORTS=1 \
-	GRADIO_SERVER_NAME=0.0.0.0 \
-	GRADIO_THEME=huggingface \
-	SYSTEM=spaces
 WORKDIR $HOME/app
-# Copy the current directory contents into the container at $HOME/app setting the owner to the user
-COPY --chown=user . $HOME/app
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM --platform=amd64 nvcr.io/nvidia/cuda:12.1.0-devel-ubuntu22.04 as base
+# Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
 ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
 WORKDIR $HOME/app
+RUN apt update && \
+    apt install -y python3-pip python3-packaging \
+    git ninja-build && \
+    pip3 install -U pip
+# Tweak this list to reduce build time
+# https://developer.nvidia.com/cuda-gpus
+ENV TORCH_CUDA_ARCH_LIST "7.0;7.2;7.5;8.0;8.6;8.9;9.0"
+RUN pip3 install "torch==2.1.1"
+# This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
+RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
+RUN pip3 install "git+https://github.com/vllm-project/vllm.git"
+RUN pip3 install "xformers==0.0.23" "transformers==4.36.0" "fschat[model_worker]==0.2.34"
+RUN git clone https://github.com/NVIDIA/apex && \
+    cd apex && git checkout 2386a912164b0c5cfcd8be7a2b890fbac5607c82 && \
+    sed -i '/check_cuda_torch_binary_vs_bare_metal(CUDA_HOME)/d' setup.py && \
+    python3 setup.py install --cpp_ext --cuda_ext
+COPY entrypoint.sh .
+RUN chmod +x $HOME/app/entrypoint.sh
+ENTRYPOINT ["$HOME/app/entrypoint.sh"]

entrypoint.sh ADDED Viewed

+#!/bin/bash
+if [[ ! -z "${HF_TOKEN}" ]]; then
+    echo "The HF_TOKEN environment variable set, logging to Hugging Face."
+    python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
+else
+    echo "The HF_TOKEN environment variable is not set or empty, not logging to Hugging Face."
+fi
+# Run the provided command
+exec python3 -u -m vllm.entrypoints.openai.api_server "${HF_MODEL}" --host "0.0.0.0" --port 7860