#!/bin/sh # For mlock support ulimit -l unlimited ./llama.cpp/server -m /model/gguf-model.bin --mlock -c 4096 -t 8 --no-mmap --embedding --timeout 600 --batch-size 20 --port 7860 --host "0.0.0.0"