Spaces:
Runtime error
Runtime error
Arrcttacsrks
commited on
Upload llama.cpp/ggml/CMakeLists.txt with huggingface_hub
Browse files- llama.cpp/ggml/CMakeLists.txt +269 -0
llama.cpp/ggml/CMakeLists.txt
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
2 |
+
project("ggml" C CXX)
|
3 |
+
include(CheckIncludeFileCXX)
|
4 |
+
|
5 |
+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
6 |
+
|
7 |
+
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
8 |
+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
9 |
+
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
10 |
+
endif()
|
11 |
+
|
12 |
+
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
13 |
+
set(GGML_STANDALONE ON)
|
14 |
+
|
15 |
+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
16 |
+
|
17 |
+
# configure project version
|
18 |
+
# TODO
|
19 |
+
else()
|
20 |
+
set(GGML_STANDALONE OFF)
|
21 |
+
endif()
|
22 |
+
|
23 |
+
if (EMSCRIPTEN)
|
24 |
+
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
25 |
+
|
26 |
+
option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
|
27 |
+
else()
|
28 |
+
if (MINGW)
|
29 |
+
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
30 |
+
else()
|
31 |
+
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
32 |
+
endif()
|
33 |
+
endif()
|
34 |
+
|
35 |
+
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
36 |
+
|
37 |
+
#
|
38 |
+
# option list
|
39 |
+
#
|
40 |
+
|
41 |
+
# TODO: mark all options as advanced when not GGML_STANDALONE
|
42 |
+
|
43 |
+
if (APPLE)
|
44 |
+
set(GGML_METAL_DEFAULT ON)
|
45 |
+
set(GGML_BLAS_DEFAULT ON)
|
46 |
+
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
|
47 |
+
else()
|
48 |
+
set(GGML_METAL_DEFAULT OFF)
|
49 |
+
set(GGML_BLAS_DEFAULT OFF)
|
50 |
+
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
51 |
+
endif()
|
52 |
+
|
53 |
+
if (CMAKE_CROSSCOMPILING)
|
54 |
+
set(GGML_NATIVE_DEFAULT OFF)
|
55 |
+
else()
|
56 |
+
set(GGML_NATIVE_DEFAULT ON)
|
57 |
+
endif()
|
58 |
+
|
59 |
+
# defaults
|
60 |
+
if (NOT GGML_LLAMAFILE_DEFAULT)
|
61 |
+
set(GGML_LLAMAFILE_DEFAULT OFF)
|
62 |
+
endif()
|
63 |
+
|
64 |
+
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
65 |
+
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
|
66 |
+
endif()
|
67 |
+
|
68 |
+
# general
|
69 |
+
option(GGML_STATIC "ggml: static link libraries" OFF)
|
70 |
+
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
|
71 |
+
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
72 |
+
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
73 |
+
|
74 |
+
# debug
|
75 |
+
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
76 |
+
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
77 |
+
option(GGML_GPROF "ggml: enable gprof" OFF)
|
78 |
+
|
79 |
+
# build
|
80 |
+
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
81 |
+
|
82 |
+
# sanitizers
|
83 |
+
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
|
84 |
+
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
|
85 |
+
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
86 |
+
|
87 |
+
# instruction set specific
|
88 |
+
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
|
89 |
+
set(INS_ENB OFF)
|
90 |
+
else()
|
91 |
+
set(INS_ENB ON)
|
92 |
+
endif()
|
93 |
+
|
94 |
+
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
95 |
+
|
96 |
+
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
97 |
+
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
98 |
+
option(GGML_AVX512 "ggml: enable AVX512" OFF)
|
99 |
+
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
100 |
+
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
101 |
+
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
102 |
+
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
|
103 |
+
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
|
104 |
+
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
|
105 |
+
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
106 |
+
if (NOT MSVC)
|
107 |
+
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
|
108 |
+
endif()
|
109 |
+
option(GGML_LASX "ggml: enable lasx" ON)
|
110 |
+
option(GGML_LSX "ggml: enable lsx" ON)
|
111 |
+
option(GGML_SVE "ggml: enable SVE" OFF)
|
112 |
+
|
113 |
+
if (WIN32)
|
114 |
+
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
|
115 |
+
endif()
|
116 |
+
|
117 |
+
# ggml core
|
118 |
+
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
119 |
+
|
120 |
+
# 3rd party libs / backends
|
121 |
+
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
122 |
+
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
123 |
+
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
124 |
+
"ggml: BLAS library vendor")
|
125 |
+
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
|
126 |
+
|
127 |
+
option(GGML_CUDA "ggml: use CUDA" OFF)
|
128 |
+
option(GGML_MUSA "ggml: use MUSA" OFF)
|
129 |
+
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
|
130 |
+
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
131 |
+
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
|
132 |
+
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
|
133 |
+
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
|
134 |
+
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
135 |
+
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
|
136 |
+
"ggml: iters./thread per block for Q2_K/Q6_K")
|
137 |
+
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
138 |
+
"ggml: max. batch size for using peer access")
|
139 |
+
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
140 |
+
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
141 |
+
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
142 |
+
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
143 |
+
|
144 |
+
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
145 |
+
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
146 |
+
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
147 |
+
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
148 |
+
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
149 |
+
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
150 |
+
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
151 |
+
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
152 |
+
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
153 |
+
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
154 |
+
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
155 |
+
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
156 |
+
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
157 |
+
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
158 |
+
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
159 |
+
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
160 |
+
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
161 |
+
"ggml: metal minimum macOS version")
|
162 |
+
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
163 |
+
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
164 |
+
option(GGML_RPC "ggml: use RPC" OFF)
|
165 |
+
option(GGML_AMX "ggml: use AMX" OFF)
|
166 |
+
option(GGML_SYCL "ggml: use SYCL" OFF)
|
167 |
+
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
168 |
+
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
169 |
+
"ggml: sycl target device")
|
170 |
+
|
171 |
+
# extra artifacts
|
172 |
+
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
173 |
+
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
174 |
+
|
175 |
+
#
|
176 |
+
# dependencies
|
177 |
+
#
|
178 |
+
|
179 |
+
set(CMAKE_C_STANDARD 11)
|
180 |
+
set(CMAKE_C_STANDARD_REQUIRED true)
|
181 |
+
|
182 |
+
if (GGML_SYCL)
|
183 |
+
set(CMAKE_CXX_STANDARD 17)
|
184 |
+
else()
|
185 |
+
set(CMAKE_CXX_STANDARD 11)
|
186 |
+
endif()
|
187 |
+
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
188 |
+
|
189 |
+
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
190 |
+
|
191 |
+
find_package(Threads REQUIRED)
|
192 |
+
|
193 |
+
#
|
194 |
+
# build the library
|
195 |
+
#
|
196 |
+
|
197 |
+
add_subdirectory(src)
|
198 |
+
|
199 |
+
#
|
200 |
+
# tests and examples
|
201 |
+
#
|
202 |
+
|
203 |
+
if (GGML_BUILD_TESTS)
|
204 |
+
enable_testing()
|
205 |
+
add_subdirectory(tests)
|
206 |
+
endif ()
|
207 |
+
|
208 |
+
if (GGML_BUILD_EXAMPLES)
|
209 |
+
add_subdirectory(examples)
|
210 |
+
endif ()
|
211 |
+
|
212 |
+
#
|
213 |
+
# install
|
214 |
+
#
|
215 |
+
|
216 |
+
include(GNUInstallDirs)
|
217 |
+
include(CMakePackageConfigHelpers)
|
218 |
+
|
219 |
+
# all public headers
|
220 |
+
set(GGML_PUBLIC_HEADERS
|
221 |
+
include/ggml.h
|
222 |
+
include/ggml-cpu.h
|
223 |
+
include/ggml-alloc.h
|
224 |
+
include/ggml-backend.h
|
225 |
+
include/ggml-blas.h
|
226 |
+
include/ggml-cann.h
|
227 |
+
include/ggml-cuda.h
|
228 |
+
include/ggml-kompute.h
|
229 |
+
include/ggml-metal.h
|
230 |
+
include/ggml-rpc.h
|
231 |
+
include/ggml-sycl.h
|
232 |
+
include/ggml-vulkan.h)
|
233 |
+
|
234 |
+
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
235 |
+
#if (GGML_METAL)
|
236 |
+
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
237 |
+
#endif()
|
238 |
+
install(TARGETS ggml PUBLIC_HEADER)
|
239 |
+
|
240 |
+
if (BUILD_SHARED_LIBS)
|
241 |
+
install(TARGETS ggml LIBRARY)
|
242 |
+
endif()
|
243 |
+
|
244 |
+
if (GGML_METAL)
|
245 |
+
install(
|
246 |
+
FILES src/ggml-metal.metal
|
247 |
+
PERMISSIONS
|
248 |
+
OWNER_READ
|
249 |
+
OWNER_WRITE
|
250 |
+
GROUP_READ
|
251 |
+
WORLD_READ
|
252 |
+
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
253 |
+
|
254 |
+
if (NOT GGML_METAL_EMBED_LIBRARY)
|
255 |
+
install(
|
256 |
+
FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
257 |
+
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
258 |
+
)
|
259 |
+
endif()
|
260 |
+
endif()
|
261 |
+
|
262 |
+
if (GGML_STANDALONE)
|
263 |
+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
|
264 |
+
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
265 |
+
@ONLY)
|
266 |
+
|
267 |
+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
268 |
+
DESTINATION share/pkgconfig)
|
269 |
+
endif()
|