#!/bin/bash # Source the environment source ~/git/llama.cpp/.venv/bin/activate # Path to llama-quantize QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize # Detect thread count for max performance (macOS) THREADS=$(sysctl -n hw.logicalcpu) echo "Detected $THREADS threads." # Find the input file (looking for F16 or f16 in the name in the current directory) INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1) if [ -z "$INPUT_FILE" ]; then echo "Error: No F16 GGUF file found in the current directory." exit 1 fi # Remove leading ./ for cleaner filenames INPUT_FILE=${INPUT_FILE#./} echo "Found input file: $INPUT_FILE" # List of quantization types requested TYPES=( "IQ3_M" "IQ3_XS" "IQ3_XXS" "IQ4_NL" "IQ4_XS" "Q3_K_L" "Q3_K_M" "Q3_K_S" "Q3_K_XL" "Q4_0" "Q4_1" "Q4_K_L" "Q4_K_M" "Q4_K_S" "Q5_K_L" "Q5_K_M" "Q5_K_S" "Q6_K" "Q6_K_L" "Q8_0" ) echo "Starting batch quantization..." echo "----------------------------------------" for TYPE in "${TYPES[@]}"; do # Construct output filename by replacing F16 or f16 with the quant type # Using python to handle case-insensitive replacement safely if needed, or simple bash substitution # Simple bash substitution for F16 and f16: OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}" OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}" # If substitution didn't happen (filename matches neither), just append type if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf" fi echo "Quantizing to $TYPE..." "$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS" EXIT_CODE=$? if [ $EXIT_CODE -eq 0 ]; then echo "✅ Successfully created $OUTPUT_FILE" # Check for file size and split if necessary (Limit: 40GB) # 40GB in bytes = 52949672960 (using 1024^3 * 40) LIMIT_BYTES=42949672960 FILE_SIZE=$(stat -f%z "$OUTPUT_FILE") if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..." # Create directory name (remove .gguf extension) DIR_NAME="${OUTPUT_FILE%.gguf}" mkdir -p "$DIR_NAME" # Split tool path SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..." # Change to the new directory to run the split command pushd "$DIR_NAME" > /dev/null # Run split command: Flags first, then IN, then OUT prefix "$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)" SPLIT_EXIT=$? # Change back to original directory popd > /dev/null if [ $SPLIT_EXIT -eq 0 ]; then echo "✅ Split successful. Removing original large file." rm "$OUTPUT_FILE" else echo "❌ Splitting failed. Keeping original file." fi fi else echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)" echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)" fi echo "----------------------------------------" done echo "Batch quantization complete."