File size: 1,805 Bytes
f7fd884
 
 
 
 
 
 
 
 
b706786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
061092b
 
0a67f63
 
061092b
 
 
 
 
 
 
 
 
 
 
 
b7d4623
 
 
 
c2ba1c4
 
 
 
061092b
 
 
 
690332d
 
b706786
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash

if [[ ! -z "${HF_TOKEN}" ]]; then
    echo "The HF_TOKEN environment variable set, logging to Hugging Face."
    python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
else
    echo "The HF_TOKEN environment variable is not set or empty, not logging to Hugging Face."
fi

additional_args=${EXTRA_ARGS:-""}
if [[ ! -z "${QUANTIZATION}" ]]; then
    if [[ -z "${DTYPE}" ]]; then
        echo "Missing required environment variable DTYPE when QUANTIZATION is set"
        exit 1
    else
        additional_args="${additional_args} -q ${QUANTIZATION} --dtype ${DTYPE}"
    fi
fi

if [[ ! -z "${GPU_MEMORY_UTILIZATION}" ]]; then
    additional_args="${additional_args} --gpu-memory-utilization ${GPU_MEMORY_UTILIZATION}"
fi

if [[ ! -z "${MAX_MODEL_LEN}" ]]; then
    additional_args="${additional_args} --max-model-len ${MAX_MODEL_LEN}"
fi

if [[ ! -z "${TENSOR_PARALLEL_SIZE}" ]]; then
    additional_args="${additional_args} --tensor-parallel-size ${TENSOR_PARALLEL_SIZE}"
fi

if [[ ! -z "${DOWNLAD_DIR}" ]]; then
    additional_args="${additional_args} --download-dir ${DOWNLAD_DIR}"
fi

if [[ ! -z "${ENFORCE_EAGER}" ]]; then
    additional_args="${additional_args} --enforce-eager"
fi

if [[ ! -z "${SERVED_MODEL_NAME}" ]]; then
    additional_args="${additional_args} --served-model-name ${SERVED_MODEL_NAME}"
fi

if [[ ! -z "${SWAP_SPACE}" ]]; then
    additional_args="${additional_args} --swap-space ${SWAP_SPACE}"
fi

if [[ ! -z "${CHAT_TEMPLATE}" ]]; then
    additional_args="${additional_args} --chat-template ${CHAT_TEMPLATE}"
fi

# PATH_MODEL="/data/models--${HF_MODEL/\//--}"
# if [ -d "$PATH_MODEL" ]; then
#   HF_MODEL=$PATH_MODEL
# fi

exec python3 -u api_server.py \
    --model "${HF_MODEL}" \
    --host 0.0.0.0 \
    --port 7860 \
    ${additional_args}