Ina-v11.1-gguf / quantize_all.sh
ehartford's picture
Add files using upload-large-folder tool
793fac3 verified
#!/bin/bash
# Source the environment
source ~/git/llama.cpp/.venv/bin/activate
# Path to llama-quantize
QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize
# Detect thread count for max performance (macOS)
THREADS=$(sysctl -n hw.logicalcpu)
echo "Detected $THREADS threads."
# Find the input file (looking for F16 or f16 in the name in the current directory)
INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1)
if [ -z "$INPUT_FILE" ]; then
echo "Error: No F16 GGUF file found in the current directory."
exit 1
fi
# Remove leading ./ for cleaner filenames
INPUT_FILE=${INPUT_FILE#./}
echo "Found input file: $INPUT_FILE"
# List of quantization types requested
TYPES=(
"IQ3_M"
"IQ3_XS"
"IQ3_XXS"
"IQ4_NL"
"IQ4_XS"
"Q3_K_L"
"Q3_K_M"
"Q3_K_S"
"Q3_K_XL"
"Q4_0"
"Q4_1"
"Q4_K_L"
"Q4_K_M"
"Q4_K_S"
"Q5_K_L"
"Q5_K_M"
"Q5_K_S"
"Q6_K"
"Q6_K_L"
"Q8_0"
)
echo "Starting batch quantization..."
echo "----------------------------------------"
for TYPE in "${TYPES[@]}"; do
# Construct output filename by replacing F16 or f16 with the quant type
# Using python to handle case-insensitive replacement safely if needed, or simple bash substitution
# Simple bash substitution for F16 and f16:
OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}"
OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}"
# If substitution didn't happen (filename matches neither), just append type
if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then
OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf"
fi
echo "Quantizing to $TYPE..."
"$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS"
EXIT_CODE=$?
if [ $EXIT_CODE -eq 0 ]; then
echo "βœ… Successfully created $OUTPUT_FILE"
# Check for file size and split if necessary (Limit: 40GB)
# 40GB in bytes = 52949672960 (using 1024^3 * 40)
LIMIT_BYTES=42949672960
FILE_SIZE=$(stat -f%z "$OUTPUT_FILE")
if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..."
# Create directory name (remove .gguf extension)
DIR_NAME="${OUTPUT_FILE%.gguf}"
mkdir -p "$DIR_NAME"
# Split tool path
SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..."
# Change to the new directory to run the split command
pushd "$DIR_NAME" > /dev/null
# Run split command: Flags first, then IN, then OUT prefix
"$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)"
SPLIT_EXIT=$?
# Change back to original directory
popd > /dev/null
if [ $SPLIT_EXIT -eq 0 ]; then
echo "βœ… Split successful. Removing original large file."
rm "$OUTPUT_FILE"
else
echo "❌ Splitting failed. Keeping original file."
fi
fi
else
echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)"
echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)"
fi
echo "----------------------------------------"
done
echo "Batch quantization complete."