#!/bin/bash if [[ ! -z "${HF_TOKEN}" ]]; then echo "The HF_TOKEN environment variable set, logging to Hugging Face." python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')" else echo "The HF_TOKEN environment variable is not set or empty, not logging to Hugging Face." fi additional_args=${EXTRA_ARGS:-""} if [[ ! -z "${QUANTIZATION}" ]]; then if [[ -z "${DTYPE}" ]]; then echo "Missing required environment variable DTYPE when QUANTIZATION is set" exit 1 else additional_args="${additional_args} -q ${QUANTIZATION} --dtype ${DTYPE}" fi fi if [[ ! -z "${GPU_MEMORY_UTILIZATION}" ]]; then additional_args="${additional_args} --gpu-memory-utilization ${GPU_MEMORY_UTILIZATION}" fi if [[ ! -z "${MAX_MODEL_LEN}" ]]; then additional_args="${additional_args} --max-model-len ${MAX_MODEL_LEN}" fi # Run the provided command exec python3 -u -m vllm.entrypoints.openai.api_server \ --model "${HF_MODEL}" \ --host 0.0.0.0 \ --port 7860 \ ${additional_args}