bitnet-Llama3-8B-1.58-100B-tokens-GGUF

Runtime error

Update Dockerfile

4848ae7 verified 2 months ago

1.02 kB

	ARG UBUNTU_VERSION=22.04
	ARG BASE_CPU_CONTAINER=ubuntu:${UBUNTU_VERSION}

	FROM ${BASE_CPU_CONTAINER} as build

	RUN apt-get update && \
	apt-get install -y build-essential git make cmake wget

	WORKDIR /build

	RUN git clone https://github.com/ggerganov/llama.cpp.git

	WORKDIR /build/llama.cpp

	# Ensure that LLAMA_CUBLAS is not set to 1
	#ENV LLAMA_CUBLAS=0
	ENV LDFLAGS="-static"

	RUN make llama-server
	#RUN mkdir build && \
	# cd build && \
	# cmake .. && \
	# cmake --build . --config Release --target llama-server

	WORKDIR /data
	RUN wget https://huggingface.co/brunopio/Llama3-8B-1.58-100B-tokens-GGUF/resolve/main/Llama3-8B-1.58-100B-tokens-TQ2_0.gguf -nv -O model.gguf

	FROM ${BASE_CPU_CONTAINER} as runtime

	WORKDIR /app

	# Copy the executable from the build stage
	COPY --from=build /build/llama.cpp/llama-server /app
	COPY --from=build /data/model.gguf /data/model.gguf
	COPY ./run.sh /app/run.sh
	WORKDIR /app
	EXPOSE 7860

	# Make the script executable
	RUN chmod +x run.sh

	# CMD to run your script
	CMD ./run.sh