|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
source ~/git/llama.cpp/.venv/bin/activate |
|
|
|
|
|
|
|
|
QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize |
|
|
|
|
|
|
|
|
THREADS=$(sysctl -n hw.logicalcpu) |
|
|
echo "Detected $THREADS threads." |
|
|
|
|
|
|
|
|
INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1) |
|
|
|
|
|
if [ -z "$INPUT_FILE" ]; then |
|
|
echo "Error: No F16 GGUF file found in the current directory." |
|
|
exit 1 |
|
|
fi |
|
|
|
|
|
|
|
|
INPUT_FILE=${INPUT_FILE#./} |
|
|
|
|
|
echo "Found input file: $INPUT_FILE" |
|
|
|
|
|
|
|
|
TYPES=( |
|
|
"IQ3_M" |
|
|
"IQ3_XS" |
|
|
"IQ3_XXS" |
|
|
"IQ4_NL" |
|
|
"IQ4_XS" |
|
|
"Q3_K_L" |
|
|
"Q3_K_M" |
|
|
"Q3_K_S" |
|
|
"Q3_K_XL" |
|
|
"Q4_0" |
|
|
"Q4_1" |
|
|
"Q4_K_L" |
|
|
"Q4_K_M" |
|
|
"Q4_K_S" |
|
|
"Q5_K_L" |
|
|
"Q5_K_M" |
|
|
"Q5_K_S" |
|
|
"Q6_K" |
|
|
"Q6_K_L" |
|
|
"Q8_0" |
|
|
) |
|
|
|
|
|
echo "Starting batch quantization..." |
|
|
echo "----------------------------------------" |
|
|
|
|
|
for TYPE in "${TYPES[@]}"; do |
|
|
|
|
|
|
|
|
|
|
|
OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}" |
|
|
OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}" |
|
|
|
|
|
|
|
|
if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then |
|
|
OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf" |
|
|
fi |
|
|
|
|
|
echo "Quantizing to $TYPE..." |
|
|
"$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS" |
|
|
|
|
|
EXIT_CODE=$? |
|
|
if [ $EXIT_CODE -eq 0 ]; then |
|
|
echo "β
Successfully created $OUTPUT_FILE" |
|
|
|
|
|
|
|
|
|
|
|
LIMIT_BYTES=42949672960 |
|
|
FILE_SIZE=$(stat -f%z "$OUTPUT_FILE") |
|
|
|
|
|
if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then |
|
|
echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..." |
|
|
|
|
|
|
|
|
DIR_NAME="${OUTPUT_FILE%.gguf}" |
|
|
mkdir -p "$DIR_NAME" |
|
|
|
|
|
|
|
|
SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split |
|
|
|
|
|
echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..." |
|
|
|
|
|
|
|
|
pushd "$DIR_NAME" > /dev/null |
|
|
|
|
|
|
|
|
"$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)" |
|
|
|
|
|
SPLIT_EXIT=$? |
|
|
|
|
|
|
|
|
popd > /dev/null |
|
|
|
|
|
if [ $SPLIT_EXIT -eq 0 ]; then |
|
|
echo "β
Split successful. Removing original large file." |
|
|
rm "$OUTPUT_FILE" |
|
|
else |
|
|
echo "β Splitting failed. Keeping original file." |
|
|
fi |
|
|
fi |
|
|
|
|
|
else |
|
|
echo "β Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)" |
|
|
echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)" |
|
|
fi |
|
|
echo "----------------------------------------" |
|
|
done |
|
|
|
|
|
echo "Batch quantization complete." |
|
|
|