Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +9 -27
Dockerfile
CHANGED
@@ -2,40 +2,22 @@
|
|
2 |
# Build as `docker build . -t localgpt`, requires BuildKit.
|
3 |
# Run as `docker run -it --mount src="$HOME/.cache",target=/root/.cache,type=bind --gpus=all localgpt`, requires Nvidia container toolkit.
|
4 |
|
5 |
-
|
6 |
-
FROM nvidia/cuda:${CUDA_IMAGE}
|
7 |
-
|
8 |
-
RUN apt-get update && apt-get upgrade -y \
|
9 |
-
&& apt-get install -y git build-essential \
|
10 |
-
python3 python3-pip gcc wget \
|
11 |
-
ocl-icd-opencl-dev opencl-headers clinfo \
|
12 |
-
libclblast-dev libopenblas-dev \
|
13 |
-
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.
|
14 |
-
|
15 |
-
RUN python3 -m pip install --upgrade pip pytest cmake \
|
16 |
-
scikit-build setuptools fastapi uvicorn sse-starlette \
|
17 |
-
pydantic-settings starlette-context gradio huggingface_hub hf_transfer
|
18 |
-
|
19 |
RUN apt-get update && apt-get install -y software-properties-common
|
20 |
-
RUN apt-get install -y g++-11
|
21 |
-
|
22 |
-
ENV CUDA_DOCKER_ARCH=all
|
23 |
-
ENV LLAMA_CUBLAS=1
|
24 |
-
|
25 |
# only copy what's needed at every step to optimize layer cache
|
26 |
COPY ./requirements.txt .
|
27 |
-
|
28 |
# use BuildKit cache mount to drastically reduce redownloading from pip on repeated builds
|
29 |
-
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install requirements.txt llama-cpp-python
|
30 |
-
|
31 |
COPY SOURCE_DOCUMENTS ./SOURCE_DOCUMENTS
|
32 |
-
|
33 |
COPY ingest.py constants.py ./
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
COPY . .
|
39 |
|
40 |
ENV device_type=cuda
|
41 |
-
CMD
|
|
|
2 |
# Build as `docker build . -t localgpt`, requires BuildKit.
|
3 |
# Run as `docker run -it --mount src="$HOME/.cache",target=/root/.cache,type=bind --gpus=all localgpt`, requires Nvidia container toolkit.
|
4 |
|
5 |
+
FROM nvidia/cuda:11.7.1-runtime-ubuntu22.04
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
RUN apt-get update && apt-get install -y software-properties-common
|
7 |
+
RUN apt-get install -y g++-11 make python3 python-is-python3 pip
|
|
|
|
|
|
|
|
|
8 |
# only copy what's needed at every step to optimize layer cache
|
9 |
COPY ./requirements.txt .
|
|
|
10 |
# use BuildKit cache mount to drastically reduce redownloading from pip on repeated builds
|
11 |
+
RUN --mount=type=cache,target=/root/.cache CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --timeout 100 -r requirements.txt llama-cpp-python==0.1.83
|
|
|
12 |
COPY SOURCE_DOCUMENTS ./SOURCE_DOCUMENTS
|
|
|
13 |
COPY ingest.py constants.py ./
|
14 |
+
# Docker BuildKit does not support GPU during *docker build* time right now, only during *docker run*.
|
15 |
+
# See <https://github.com/moby/buildkit/issues/1436>.
|
16 |
+
# If this changes in the future you can `docker build --build-arg device_type=cuda . -t localgpt` (+GPU argument to be determined).
|
17 |
+
ARG device_type=cpu
|
18 |
+
RUN --mount=type=cache,target=/root/.cache python ingest.py --device_type $device_type
|
19 |
|
20 |
COPY . .
|
21 |
|
22 |
ENV device_type=cuda
|
23 |
+
CMD python run_localGPT.py --device_type $device_type
|