#!/bin/sh

# For mlock support
ulimit -l unlimited

./llama.cpp/server -m /model/gguf-model.bin --mlock -c 4096 -t 8 --no-mmap --embedding --timeout 600 --batch-size 20 --port 7860 --host "0.0.0.0"