File size: 3,437 Bytes

793fac3

#!/bin/bash

# Source the environment
source ~/git/llama.cpp/.venv/bin/activate

# Path to llama-quantize
QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize

# Detect thread count for max performance (macOS)
THREADS=$(sysctl -n hw.logicalcpu)
echo "Detected $THREADS threads."

# Find the input file (looking for F16 or f16 in the name in the current directory)
INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1)

if [ -z "$INPUT_FILE" ]; then
    echo "Error: No F16 GGUF file found in the current directory."
    exit 1
fi

# Remove leading ./ for cleaner filenames
INPUT_FILE=${INPUT_FILE#./}

echo "Found input file: $INPUT_FILE"

# List of quantization types requested
TYPES=(
    "IQ3_M"
    "IQ3_XS"
    "IQ3_XXS"
    "IQ4_NL"
    "IQ4_XS"
    "Q3_K_L"
    "Q3_K_M"
    "Q3_K_S"
    "Q3_K_XL"
    "Q4_0"
    "Q4_1"
    "Q4_K_L"
    "Q4_K_M"
    "Q4_K_S"
    "Q5_K_L"
    "Q5_K_M"
    "Q5_K_S"
    "Q6_K"
    "Q6_K_L"
    "Q8_0"
)

echo "Starting batch quantization..."
echo "----------------------------------------"

for TYPE in "${TYPES[@]}"; do
    # Construct output filename by replacing F16 or f16 with the quant type
    # Using python to handle case-insensitive replacement safely if needed, or simple bash substitution
    # Simple bash substitution for F16 and f16:
    OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}"
    OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}"
    
    # If substitution didn't happen (filename matches neither), just append type
    if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then
        OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf"
    fi

    echo "Quantizing to $TYPE..."
    "$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS"
    
    EXIT_CODE=$?
    if [ $EXIT_CODE -eq 0 ]; then
        echo "✅ Successfully created $OUTPUT_FILE"

        # Check for file size and split if necessary (Limit: 40GB)
        # 40GB in bytes = 52949672960 (using 1024^3 * 40)
        LIMIT_BYTES=42949672960
        FILE_SIZE=$(stat -f%z "$OUTPUT_FILE")

        if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
            echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..."
            
            # Create directory name (remove .gguf extension)
            DIR_NAME="${OUTPUT_FILE%.gguf}"
            mkdir -p "$DIR_NAME"
            
            # Split tool path
            SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
            
            echo "  Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..."

            # Change to the new directory to run the split command
            pushd "$DIR_NAME" > /dev/null

            # Run split command: Flags first, then IN, then OUT prefix
            "$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)"
            
            SPLIT_EXIT=$?
            
            # Change back to original directory
            popd > /dev/null

            if [ $SPLIT_EXIT -eq 0 ]; then
                echo "✅ Split successful. Removing original large file."
                rm "$OUTPUT_FILE"
            else
                echo "❌ Splitting failed. Keeping original file."
            fi
        fi

    else
        echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)"
        echo "   (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)"
    fi
    echo "----------------------------------------"
done

echo "Batch quantization complete."