|
{ |
|
lib, |
|
glibc, |
|
config, |
|
stdenv, |
|
runCommand, |
|
cmake, |
|
ninja, |
|
pkg-config, |
|
git, |
|
mpi, |
|
blas, |
|
cudaPackages, |
|
autoAddDriverRunpath, |
|
darwin, |
|
rocmPackages, |
|
vulkan-headers, |
|
vulkan-loader, |
|
curl, |
|
shaderc, |
|
useBlas ? |
|
builtins.all (x: !x) [ |
|
useCuda |
|
useMetalKit |
|
useRocm |
|
useVulkan |
|
] |
|
&& blas.meta.available, |
|
useCuda ? config.cudaSupport, |
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, |
|
|
|
useMpi ? false, |
|
useRocm ? config.rocmSupport, |
|
enableCurl ? true, |
|
useVulkan ? false, |
|
llamaVersion ? "0.0.0", |
|
|
|
|
|
|
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, |
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic, |
|
precompileMetalShaders ? false, |
|
}: |
|
|
|
let |
|
inherit (lib) |
|
cmakeBool |
|
cmakeFeature |
|
optionals |
|
strings |
|
; |
|
|
|
stdenv = throw "Use effectiveStdenv instead"; |
|
|
|
suffices = |
|
lib.optionals useBlas [ "BLAS" ] |
|
++ lib.optionals useCuda [ "CUDA" ] |
|
++ lib.optionals useMetalKit [ "MetalKit" ] |
|
++ lib.optionals useMpi [ "MPI" ] |
|
++ lib.optionals useRocm [ "ROCm" ] |
|
++ lib.optionals useVulkan [ "Vulkan" ]; |
|
|
|
pnameSuffix = |
|
strings.optionalString (suffices != [ ]) |
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}"; |
|
descriptionSuffix = strings.optionalString ( |
|
suffices != [ ] |
|
) ", accelerated with ${strings.concatStringsSep ", " suffices}"; |
|
|
|
xcrunHost = runCommand "xcrunHost" { } '' |
|
mkdir -p $out/bin |
|
ln -s /usr/bin/xcrun $out/bin |
|
''; |
|
|
|
|
|
|
|
darwinBuildInputs = |
|
with darwin.apple_sdk.frameworks; |
|
[ |
|
Accelerate |
|
CoreVideo |
|
CoreGraphics |
|
] |
|
++ optionals useMetalKit [ MetalKit ]; |
|
|
|
cudaBuildInputs = with cudaPackages; [ |
|
cuda_cudart |
|
cuda_cccl |
|
libcublas |
|
]; |
|
|
|
rocmBuildInputs = with rocmPackages; [ |
|
clr |
|
hipblas |
|
rocblas |
|
]; |
|
|
|
vulkanBuildInputs = [ |
|
vulkan-headers |
|
vulkan-loader |
|
shaderc |
|
]; |
|
in |
|
|
|
effectiveStdenv.mkDerivation (finalAttrs: { |
|
pname = "llama-cpp${pnameSuffix}"; |
|
version = llamaVersion; |
|
|
|
|
|
|
|
|
|
src = lib.cleanSourceWith { |
|
filter = |
|
name: type: |
|
let |
|
noneOf = builtins.all (x: !x); |
|
baseName = baseNameOf name; |
|
in |
|
noneOf [ |
|
(lib.hasSuffix ".nix" name) |
|
(lib.hasSuffix ".md" name) |
|
(lib.hasPrefix "." baseName) |
|
(baseName == "flake.lock") |
|
]; |
|
src = lib.cleanSource ../../.; |
|
}; |
|
|
|
postPatch = '' |
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ |
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" |
|
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ |
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" |
|
''; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; |
|
|
|
nativeBuildInputs = |
|
[ |
|
cmake |
|
ninja |
|
pkg-config |
|
git |
|
] |
|
++ optionals useCuda [ |
|
cudaPackages.cuda_nvcc |
|
|
|
autoAddDriverRunpath |
|
] |
|
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] |
|
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; |
|
|
|
buildInputs = |
|
optionals effectiveStdenv.isDarwin darwinBuildInputs |
|
++ optionals useCuda cudaBuildInputs |
|
++ optionals useMpi [ mpi ] |
|
++ optionals useRocm rocmBuildInputs |
|
++ optionals useBlas [ blas ] |
|
++ optionals useVulkan vulkanBuildInputs |
|
++ optionals enableCurl [ curl ]; |
|
|
|
cmakeFlags = |
|
[ |
|
(cmakeBool "LLAMA_BUILD_SERVER" true) |
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) |
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) |
|
(cmakeBool "LLAMA_CURL" enableCurl) |
|
(cmakeBool "GGML_NATIVE" false) |
|
(cmakeBool "GGML_BLAS" useBlas) |
|
(cmakeBool "GGML_CUDA" useCuda) |
|
(cmakeBool "GGML_HIP" useRocm) |
|
(cmakeBool "GGML_METAL" useMetalKit) |
|
(cmakeBool "GGML_VULKAN" useVulkan) |
|
(cmakeBool "GGML_STATIC" enableStatic) |
|
] |
|
++ optionals useCuda [ |
|
( |
|
with cudaPackages.flags; |
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( |
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities) |
|
) |
|
) |
|
] |
|
++ optionals useRocm [ |
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") |
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) |
|
] |
|
++ optionals useMetalKit [ |
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") |
|
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) |
|
]; |
|
|
|
|
|
env = optionals useRocm { |
|
ROCM_PATH = "${rocmPackages.clr}"; |
|
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; |
|
}; |
|
|
|
|
|
|
|
postInstall = '' |
|
mkdir -p $out/include |
|
cp $src/include/llama.h $out/include/ |
|
''; |
|
|
|
meta = { |
|
|
|
|
|
|
|
badPlatforms = optionals useCuda lib.platforms.darwin; |
|
|
|
|
|
|
|
broken = (useMetalKit && !effectiveStdenv.isDarwin); |
|
|
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; |
|
homepage = "https://github.com/ggerganov/llama.cpp/"; |
|
license = lib.licenses.mit; |
|
|
|
|
|
mainProgram = "llama-cli"; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
maintainers = with lib.maintainers; [ |
|
philiptaron |
|
SomeoneSerge |
|
]; |
|
|
|
|
|
platforms = lib.platforms.all; |
|
}; |
|
}) |
|
|