Mat17892 commited on
Commit
b664585
1 Parent(s): b97d649
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +23 -0
  2. llama.cpp/.clang-format +161 -0
  3. llama.cpp/.clang-tidy +26 -0
  4. llama.cpp/.devops/cloud-v-pipeline +22 -0
  5. llama.cpp/.devops/full-cuda.Dockerfile +33 -0
  6. llama.cpp/.devops/full-musa.Dockerfile +33 -0
  7. llama.cpp/.devops/full-rocm.Dockerfile +50 -0
  8. llama.cpp/.devops/full.Dockerfile +25 -0
  9. llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
  10. llama.cpp/.devops/llama-cli-cuda.Dockerfile +38 -0
  11. llama.cpp/.devops/llama-cli-intel.Dockerfile +28 -0
  12. llama.cpp/.devops/llama-cli-musa.Dockerfile +38 -0
  13. llama.cpp/.devops/llama-cli-rocm.Dockerfile +45 -0
  14. llama.cpp/.devops/llama-cli-vulkan.Dockerfile +27 -0
  15. llama.cpp/.devops/llama-cli.Dockerfile +23 -0
  16. llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
  17. llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
  18. llama.cpp/.devops/llama-server-cuda.Dockerfile +43 -0
  19. llama.cpp/.devops/llama-server-intel.Dockerfile +34 -0
  20. llama.cpp/.devops/llama-server-musa.Dockerfile +43 -0
  21. llama.cpp/.devops/llama-server-rocm.Dockerfile +54 -0
  22. llama.cpp/.devops/llama-server-vulkan.Dockerfile +31 -0
  23. llama.cpp/.devops/llama-server.Dockerfile +41 -0
  24. llama.cpp/.devops/nix/apps.nix +21 -0
  25. llama.cpp/.devops/nix/devshells.nix +52 -0
  26. llama.cpp/.devops/nix/docker.nix +37 -0
  27. llama.cpp/.devops/nix/jetson-support.nix +39 -0
  28. llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
  29. llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
  30. llama.cpp/.devops/nix/package.nix +246 -0
  31. llama.cpp/.devops/nix/python-scripts.nix +66 -0
  32. llama.cpp/.devops/nix/scope.nix +41 -0
  33. llama.cpp/.devops/nix/sif.nix +27 -0
  34. llama.cpp/.devops/tools.sh +41 -0
  35. llama.cpp/.dockerignore +20 -0
  36. llama.cpp/.ecrc +6 -0
  37. llama.cpp/.editorconfig +42 -0
  38. llama.cpp/.flake8 +17 -0
  39. llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +77 -0
  40. llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
  41. llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +81 -0
  42. llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
  43. llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
  44. llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
  45. llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
  46. llama.cpp/.github/labeler.yml +86 -0
  47. llama.cpp/.github/pull_request_template.md +1 -0
  48. llama.cpp/.github/workflows/bench.yml.disabled +315 -0
  49. llama.cpp/.github/workflows/build.yml +1416 -0
  50. llama.cpp/.github/workflows/close-issue.yml +28 -0
.gitattributes CHANGED
@@ -35,3 +35,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  llama_lora_model_1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  llama_lora_model_1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ llama.cpp/build/bin/llama-export-lora filter=lfs diff=lfs merge=lfs -text
39
+ llama.cpp/build/bin/llama-imatrix filter=lfs diff=lfs merge=lfs -text
40
+ llama.cpp/build/bin/llama-llava-cli filter=lfs diff=lfs merge=lfs -text
41
+ llama.cpp/build/bin/llama-minicpmv-cli filter=lfs diff=lfs merge=lfs -text
42
+ llama.cpp/build/bin/llama-perplexity filter=lfs diff=lfs merge=lfs -text
43
+ llama.cpp/build/bin/llama-server filter=lfs diff=lfs merge=lfs -text
44
+ llama.cpp/build/common/libcommon.a filter=lfs diff=lfs merge=lfs -text
45
+ llama.cpp/build/examples/server/CMakeFiles/llama-server.dir/server.cpp.o filter=lfs diff=lfs merge=lfs -text
46
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.o filter=lfs diff=lfs merge=lfs -text
47
+ llama.cpp/build/src/libllama.so filter=lfs diff=lfs merge=lfs -text
48
+ llama.cpp/models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
49
+ llama.cpp/models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
50
+ llama.cpp/models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
51
+ llama.cpp/models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
52
+ llama.cpp/models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
53
+ llama.cpp/models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
54
+ llama.cpp/models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
55
+ llama.cpp/models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
56
+ llama.cpp/models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
57
+ llama.cpp/models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
58
+ llama.cpp/models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
59
+ llama.cpp/models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
60
+ llama.cpp/models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
llama.cpp/.clang-format ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Language: Cpp
3
+ AlignAfterOpenBracket: Align
4
+ AlignArrayOfStructures: Left
5
+ AlignConsecutiveAssignments: AcrossComments
6
+ AlignConsecutiveBitFields: AcrossComments
7
+ AlignConsecutiveDeclarations: AcrossComments
8
+ AlignConsecutiveMacros: AcrossComments
9
+ # AlignConsecutiveShortCaseStatements: AcrossComments
10
+ AlignEscapedNewlines: Left # LeftWithLastLine
11
+ AlignOperands: Align
12
+ AlignTrailingComments:
13
+ Kind: Always
14
+ OverEmptyLines: 1
15
+ AllowAllArgumentsOnNextLine: true
16
+ AllowAllParametersOfDeclarationOnNextLine: false
17
+ # AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
18
+ AllowShortBlocksOnASingleLine: Never
19
+ AllowShortCaseLabelsOnASingleLine: false
20
+ AllowShortFunctionsOnASingleLine: Inline
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLambdasOnASingleLine: Inline
23
+ AllowShortLoopsOnASingleLine: false
24
+ AlwaysBreakBeforeMultilineStrings: true
25
+ BinPackArguments: true
26
+ BinPackParameters: true # OnePerLine
27
+ BitFieldColonSpacing: Both
28
+ BreakBeforeBraces: Custom # Attach
29
+ BraceWrapping:
30
+ AfterCaseLabel: true
31
+ AfterClass: false
32
+ AfterControlStatement: false
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: false
46
+ SplitEmptyRecord: false
47
+ SplitEmptyNamespace: false
48
+ # BreakAdjacentStringLiterals: true
49
+ BreakAfterAttributes: Never
50
+ BreakBeforeBinaryOperators: None
51
+ BreakBeforeInlineASMColon: OnlyMultiline
52
+ BreakBeforeTernaryOperators: false
53
+ # BreakBinaryOperations: Never
54
+ BreakConstructorInitializers: AfterColon
55
+ # BreakFunctionDefinitionParameters: false
56
+ BreakInheritanceList: AfterComma
57
+ BreakStringLiterals: true
58
+ # BreakTemplateDeclarations: Yes
59
+ ColumnLimit: 120
60
+ CommentPragmas: '^ IWYU pragma:'
61
+ CompactNamespaces: false
62
+ ConstructorInitializerIndentWidth: 4
63
+ ContinuationIndentWidth: 4
64
+ Cpp11BracedListStyle: false
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ EmptyLineBeforeAccessModifier: Leave
68
+ EmptyLineAfterAccessModifier: Never
69
+ ExperimentalAutoDetectBinPacking: false
70
+ FixNamespaceComments: true
71
+ IncludeBlocks: Regroup
72
+ IncludeCategories:
73
+ - Regex: '^<.*\.h>'
74
+ Priority: 1
75
+ SortPriority: 0
76
+ - Regex: '^<.*'
77
+ Priority: 2
78
+ SortPriority: 0
79
+ - Regex: '.*'
80
+ Priority: 3
81
+ SortPriority: 0
82
+ IncludeIsMainRegex: '([-_](test|unittest))?$'
83
+ IncludeIsMainSourceRegex: ''
84
+ IndentAccessModifiers: false
85
+ IndentCaseBlocks: true
86
+ IndentCaseLabels: true
87
+ IndentExternBlock: NoIndent
88
+ IndentGotoLabels: false
89
+ IndentPPDirectives: AfterHash
90
+ IndentWidth: 4
91
+ IndentWrappedFunctionNames: false
92
+ InsertBraces: true # NOTE: may lead to incorrect formatting
93
+ InsertNewlineAtEOF: true
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: false
97
+ LambdaBodyIndentation: Signature
98
+ LineEnding: LF
99
+ MacroBlockBegin: ''
100
+ MacroBlockEnd: ''
101
+ MaxEmptyLinesToKeep: 1
102
+ NamespaceIndentation: None
103
+ ObjCBinPackProtocolList: Auto
104
+ ObjCBlockIndentWidth: 4
105
+ ObjCSpaceAfterProperty: true
106
+ ObjCSpaceBeforeProtocolList: true
107
+ PPIndentWidth: -1
108
+ PackConstructorInitializers: CurrentLine
109
+ PenaltyBreakAssignment: 2
110
+ PenaltyBreakBeforeFirstCallParameter: 1
111
+ PenaltyBreakComment: 300
112
+ PenaltyBreakFirstLessLess: 120
113
+ PenaltyBreakString: 1000
114
+ PenaltyBreakTemplateDeclaration: 10
115
+ PenaltyExcessCharacter: 1000000
116
+ PenaltyReturnTypeOnItsOwnLine: 200
117
+ PointerAlignment: Middle
118
+ QualifierAlignment: Left
119
+ #QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
120
+ RawStringFormats:
121
+ - Language: Cpp
122
+ Delimiters:
123
+ - cc
124
+ - CC
125
+ - cpp
126
+ - Cpp
127
+ - CPP
128
+ - 'c++'
129
+ - 'C++'
130
+ CanonicalDelimiter: ''
131
+ ReferenceAlignment: Middle
132
+ ReflowComments: false # IndentOnly
133
+ SeparateDefinitionBlocks: Always
134
+ SortIncludes: CaseInsensitive
135
+ SortUsingDeclarations: LexicographicNumeric
136
+ SpaceAfterCStyleCast: true
137
+ SpaceAfterLogicalNot: false
138
+ SpaceAfterTemplateKeyword: true
139
+ SpaceBeforeAssignmentOperators: true
140
+ SpaceBeforeCpp11BracedList: false
141
+ SpaceBeforeCtorInitializerColon: true
142
+ SpaceBeforeInheritanceColon: true
143
+ SpaceBeforeParens: ControlStatements
144
+ SpaceBeforeRangeBasedForLoopColon: true
145
+ SpaceInEmptyBlock: false
146
+ SpaceInEmptyParentheses: false
147
+ SpacesBeforeTrailingComments: 2
148
+ SpacesInAngles: Never
149
+ SpacesInContainerLiterals: true
150
+ SpacesInLineCommentPrefix:
151
+ Minimum: 1
152
+ Maximum: -1
153
+ SpacesInParentheses: false
154
+ SpacesInSquareBrackets: false
155
+ SpaceBeforeSquareBrackets: false
156
+ Standard: c++17
157
+ TabWidth: 4
158
+ UseTab: Never
159
+ WhitespaceSensitiveMacros: ['STRINGIZE']
160
+ ...
161
+
llama.cpp/.clang-tidy ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Checks: >
3
+ bugprone-*,
4
+ -bugprone-easily-swappable-parameters,
5
+ -bugprone-implicit-widening-of-multiplication-result,
6
+ -bugprone-misplaced-widening-cast,
7
+ -bugprone-narrowing-conversions,
8
+ readability-*,
9
+ -readability-avoid-unconditional-preprocessor-if,
10
+ -readability-function-cognitive-complexity,
11
+ -readability-identifier-length,
12
+ -readability-implicit-bool-conversion,
13
+ -readability-magic-numbers,
14
+ -readability-uppercase-literal-suffix,
15
+ -readability-simplify-boolean-expr,
16
+ clang-analyzer-*,
17
+ -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
18
+ performance-*,
19
+ portability-*,
20
+ -portability-simd-intrinsics,
21
+ misc-*,
22
+ -misc-const-correctness,
23
+ -misc-non-private-member-variables-in-classes,
24
+ -misc-no-recursion,
25
+ -misc-use-anonymous-namespace,
26
+ FormatStyle: none
llama.cpp/.devops/cloud-v-pipeline ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2
+ stage('Cleanup'){
3
+ cleanWs() // Cleaning previous CI build in workspace
4
+ }
5
+ stage('checkout repo'){
6
+ retry(5){ // Retry if the cloning fails due to some reason
7
+ checkout scm // Clone the repo on Runner
8
+ }
9
+ }
10
+ stage('Compiling llama.cpp'){
11
+ sh'''#!/bin/bash
12
+ make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13
+ '''
14
+ }
15
+ stage('Running llama.cpp'){
16
+ sh'''#!/bin/bash
17
+ module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18
+ qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19
+ cat llama_log.txt # Printing results
20
+ '''
21
+ }
22
+ }
llama.cpp/.devops/full-cuda.Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
8
+
9
+ # CUDA architecture to build for (defaults to all supported archs)
10
+ ARG CUDA_DOCKER_ARCH=default
11
+
12
+ RUN apt-get update && \
13
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
14
+
15
+ COPY requirements.txt requirements.txt
16
+ COPY requirements requirements
17
+
18
+ RUN pip install --upgrade pip setuptools wheel \
19
+ && pip install -r requirements.txt
20
+
21
+ WORKDIR /app
22
+
23
+ COPY . .
24
+
25
+ # Use the default CUDA archs if not specified
26
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
27
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
28
+ fi && \
29
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
30
+ cmake --build build --config Release -j$(nproc) && \
31
+ cp build/bin/* .
32
+
33
+ ENTRYPOINT ["/app/.devops/tools.sh"]
llama.cpp/.devops/full-musa.Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
8
+
9
+ # MUSA architecture to build for (defaults to all supported archs)
10
+ ARG MUSA_DOCKER_ARCH=default
11
+
12
+ RUN apt-get update && \
13
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
14
+
15
+ COPY requirements.txt requirements.txt
16
+ COPY requirements requirements
17
+
18
+ RUN pip install --upgrade pip setuptools wheel \
19
+ && pip install -r requirements.txt
20
+
21
+ WORKDIR /app
22
+
23
+ COPY . .
24
+
25
+ # Use the default MUSA archs if not specified
26
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
27
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
28
+ fi && \
29
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
30
+ cmake --build build --config Release -j$(nproc) && \
31
+ cp build/bin/* .
32
+
33
+ ENTRYPOINT ["/app/.devops/tools.sh"]
llama.cpp/.devops/full-rocm.Dockerfile ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=5.6
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8
+
9
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13
+ # This is mostly tied to rocBLAS supported archs.
14
+ ARG ROCM_DOCKER_ARCH="\
15
+ gfx803 \
16
+ gfx900 \
17
+ gfx906 \
18
+ gfx908 \
19
+ gfx90a \
20
+ gfx1010 \
21
+ gfx1030 \
22
+ gfx1100 \
23
+ gfx1101 \
24
+ gfx1102"
25
+
26
+ COPY requirements.txt requirements.txt
27
+ COPY requirements requirements
28
+
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -r requirements.txt
31
+
32
+ WORKDIR /app
33
+
34
+ COPY . .
35
+
36
+ # Set nvcc architecture
37
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
38
+ # Enable ROCm
39
+ ENV GGML_HIPBLAS=1
40
+ ENV CC=/opt/rocm/llvm/bin/clang
41
+ ENV CXX=/opt/rocm/llvm/bin/clang++
42
+
43
+ # Enable cURL
44
+ ENV LLAMA_CURL=1
45
+ RUN apt-get update && \
46
+ apt-get install -y libcurl4-openssl-dev
47
+
48
+ RUN make -j$(nproc)
49
+
50
+ ENTRYPOINT ["/app/.devops/tools.sh"]
llama.cpp/.devops/full.Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
7
+
8
+ COPY requirements.txt requirements.txt
9
+ COPY requirements requirements
10
+
11
+ RUN pip install --upgrade pip setuptools wheel \
12
+ && pip install -r requirements.txt
13
+
14
+ WORKDIR /app
15
+
16
+ COPY . .
17
+
18
+ ENV LLAMA_CURL=1
19
+
20
+
21
+ RUN make -j$(nproc)
22
+
23
+ ENV LC_ALL=C.utf8
24
+
25
+ ENTRYPOINT ["/app/.devops/tools.sh"]
llama.cpp/.devops/llama-cli-cann.Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
2
+
3
+ FROM ascendai/cann:$ASCEND_VERSION AS build
4
+
5
+ WORKDIR /app
6
+
7
+ COPY . .
8
+
9
+ RUN yum install -y gcc g++ cmake make
10
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
11
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
12
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
13
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
14
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
15
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
16
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
17
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
18
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
19
+
20
+ # find libascend_hal.so, because the drive hasn`t been mounted.
21
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
22
+
23
+ RUN echo "Building with static libs" && \
24
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
26
+ cmake --build build --config Release --target llama-cli
27
+
28
+ # TODO: use image with NNRT
29
+ FROM ascendai/cann:$ASCEND_VERSION AS runtime
30
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
31
+
32
+ ENV LC_ALL=C.utf8
33
+
34
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
35
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
36
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
37
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
38
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
39
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
40
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
41
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
42
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
43
+
44
+ ENTRYPOINT ["/llama-cli" ]
llama.cpp/.devops/llama-cli-cuda.Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the CUDA runtime image
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Use the default CUDA archs if not specified
22
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24
+ fi && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26
+ cmake --build build --config Release --target llama-cli -j$(nproc) && \
27
+ mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
31
+
32
+ RUN apt-get update && \
33
+ apt-get install -y libgomp1
34
+
35
+ COPY --from=build /app/lib/ /
36
+ COPY --from=build /app/build/bin/llama-cli /
37
+
38
+ ENTRYPOINT [ "/llama-cli" ]
llama.cpp/.devops/llama-cli-intel.Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
2
+
3
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
4
+
5
+ ARG GGML_SYCL_F16=OFF
6
+ RUN apt-get update && \
7
+ apt-get install -y git
8
+
9
+ WORKDIR /app
10
+
11
+ COPY . .
12
+
13
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
14
+ echo "GGML_SYCL_F16 is set" && \
15
+ export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
16
+ fi && \
17
+ echo "Building with static libs" && \
18
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
19
+ ${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
20
+ cmake --build build --config Release --target llama-cli
21
+
22
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
23
+
24
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
25
+
26
+ ENV LC_ALL=C.utf8
27
+
28
+ ENTRYPOINT [ "/llama-cli" ]
llama.cpp/.devops/llama-cli-musa.Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the MUSA runtime image
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ # MUSA architecture to build for (defaults to all supported archs)
12
+ ARG MUSA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Use the default MUSA archs if not specified
22
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
23
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
24
+ fi && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26
+ cmake --build build --config Release --target llama-cli -j$(nproc) && \
27
+ mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
31
+
32
+ RUN apt-get update && \
33
+ apt-get install -y libgomp1
34
+
35
+ COPY --from=build /app/lib/ /
36
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
37
+
38
+ ENTRYPOINT [ "/llama-cli" ]
llama.cpp/.devops/llama-cli-rocm.Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=5.6
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8
+
9
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13
+ # This is mostly tied to rocBLAS supported archs.
14
+ ARG ROCM_DOCKER_ARCH="\
15
+ gfx803 \
16
+ gfx900 \
17
+ gfx906 \
18
+ gfx908 \
19
+ gfx90a \
20
+ gfx1010 \
21
+ gfx1030 \
22
+ gfx1100 \
23
+ gfx1101 \
24
+ gfx1102"
25
+
26
+ COPY requirements.txt requirements.txt
27
+ COPY requirements requirements
28
+
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -r requirements.txt
31
+
32
+ WORKDIR /app
33
+
34
+ COPY . .
35
+
36
+ # Set nvcc architecture
37
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
38
+ # Enable ROCm
39
+ ENV GGML_HIPBLAS=1
40
+ ENV CC=/opt/rocm/llvm/bin/clang
41
+ ENV CXX=/opt/rocm/llvm/bin/clang++
42
+
43
+ RUN make -j$(nproc) llama-cli
44
+
45
+ ENTRYPOINT [ "/app/llama-cli" ]
llama.cpp/.devops/llama-cli-vulkan.Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=jammy
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget libgomp1
7
+
8
+ # Install Vulkan SDK
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+ COPY . .
17
+ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 && \
18
+ cmake --build build --config Release --target llama-cli
19
+
20
+ # Clean up
21
+ WORKDIR /
22
+ RUN cp /app/build/bin/llama-cli /llama-cli && \
23
+ rm -rf /app
24
+
25
+ ENV LC_ALL=C.utf8
26
+
27
+ ENTRYPOINT [ "/llama-cli" ]
llama.cpp/.devops/llama-cli.Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y build-essential git
7
+
8
+ WORKDIR /app
9
+
10
+ COPY . .
11
+
12
+ RUN make -j$(nproc) llama-cli
13
+
14
+ FROM ubuntu:$UBUNTU_VERSION AS runtime
15
+
16
+ RUN apt-get update && \
17
+ apt-get install -y libgomp1
18
+
19
+ COPY --from=build /app/llama-cli /llama-cli
20
+
21
+ ENV LC_ALL=C.utf8
22
+
23
+ ENTRYPOINT [ "/llama-cli" ]
llama.cpp/.devops/llama-cpp-cuda.srpm.spec ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13
+ # It is up to the user to install the correct vendor-specific support.
14
+
15
+ Name: llama.cpp-cuda
16
+ Version: %( date "+%%Y%%m%%d" )
17
+ Release: 1%{?dist}
18
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19
+ License: MIT
20
+ Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21
+ BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22
+ Requires: cuda-toolkit
23
+ URL: https://github.com/ggerganov/llama.cpp
24
+
25
+ %define debug_package %{nil}
26
+ %define source_date_epoch_from_changelog 0
27
+
28
+ %description
29
+ CPU inference for Meta's Lllama2 models using default options.
30
+
31
+ %prep
32
+ %setup -n llama.cpp-master
33
+
34
+ %build
35
+ make -j GGML_CUDA=1
36
+
37
+ %install
38
+ mkdir -p %{buildroot}%{_bindir}/
39
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40
+ cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
41
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
42
+
43
+ mkdir -p %{buildroot}/usr/lib/systemd/system
44
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
45
+ [Unit]
46
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
47
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48
+
49
+ [Service]
50
+ Type=simple
51
+ EnvironmentFile=/etc/sysconfig/llama
52
+ ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
53
+ ExecReload=/bin/kill -s HUP $MAINPID
54
+ Restart=never
55
+
56
+ [Install]
57
+ WantedBy=default.target
58
+ EOF
59
+
60
+ mkdir -p %{buildroot}/etc/sysconfig
61
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63
+ EOF
64
+
65
+ %clean
66
+ rm -rf %{buildroot}
67
+ rm -rf %{_builddir}/*
68
+
69
+ %files
70
+ %{_bindir}/llama-cuda-cli
71
+ %{_bindir}/llama-cuda-server
72
+ %{_bindir}/llama-cuda-simple
73
+ /usr/lib/systemd/system/llamacuda.service
74
+ %config /etc/sysconfig/llama
75
+
76
+ %pre
77
+
78
+ %post
79
+
80
+ %preun
81
+ %postun
82
+
83
+ %changelog
llama.cpp/.devops/llama-cpp.srpm.spec ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # In the meantime, YYYYMMDD format will be used.
10
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
11
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
12
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
13
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
14
+ # It is up to the user to install the correct vendor-specific support.
15
+
16
+ Name: llama.cpp
17
+ Version: %( date "+%%Y%%m%%d" )
18
+ Release: 1%{?dist}
19
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
20
+ License: MIT
21
+ Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
22
+ BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23
+ Requires: libstdc++
24
+ URL: https://github.com/ggerganov/llama.cpp
25
+
26
+ %define debug_package %{nil}
27
+ %define source_date_epoch_from_changelog 0
28
+
29
+ %description
30
+ CPU inference for Meta's Lllama2 models using default options.
31
+ Models are not included in this package and must be downloaded separately.
32
+
33
+ %prep
34
+ %setup -n llama.cpp-master
35
+
36
+ %build
37
+ make -j
38
+
39
+ %install
40
+ mkdir -p %{buildroot}%{_bindir}/
41
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42
+ cp -p llama-server %{buildroot}%{_bindir}/llama-server
43
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
44
+
45
+ mkdir -p %{buildroot}/usr/lib/systemd/system
46
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47
+ [Unit]
48
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
49
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50
+
51
+ [Service]
52
+ Type=simple
53
+ EnvironmentFile=/etc/sysconfig/llama
54
+ ExecStart=/usr/bin/llama-server $LLAMA_ARGS
55
+ ExecReload=/bin/kill -s HUP $MAINPID
56
+ Restart=never
57
+
58
+ [Install]
59
+ WantedBy=default.target
60
+ EOF
61
+
62
+ mkdir -p %{buildroot}/etc/sysconfig
63
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65
+ EOF
66
+
67
+ %clean
68
+ rm -rf %{buildroot}
69
+ rm -rf %{_builddir}/*
70
+
71
+ %files
72
+ %{_bindir}/llama-cli
73
+ %{_bindir}/llama-server
74
+ %{_bindir}/llama-simple
75
+ /usr/lib/systemd/system/llama.service
76
+ %config /etc/sysconfig/llama
77
+
78
+ %pre
79
+
80
+ %post
81
+
82
+ %preun
83
+ %postun
84
+
85
+ %changelog
llama.cpp/.devops/llama-server-cuda.Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the CUDA runtime image
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Use the default CUDA archs if not specified
22
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24
+ fi && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26
+ cmake --build build --config Release --target llama-server -j$(nproc) && \
27
+ mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
31
+
32
+ RUN apt-get update && \
33
+ apt-get install -y libcurl4-openssl-dev libgomp1 curl
34
+
35
+ COPY --from=build /app/lib/ /
36
+ COPY --from=build /app/build/bin/llama-server /llama-server
37
+
38
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
39
+ ENV LLAMA_ARG_HOST=0.0.0.0
40
+
41
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
42
+
43
+ ENTRYPOINT [ "/llama-server" ]
llama.cpp/.devops/llama-server-intel.Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
2
+
3
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
4
+
5
+ ARG GGML_SYCL_F16=OFF
6
+ RUN apt-get update && \
7
+ apt-get install -y git libcurl4-openssl-dev
8
+
9
+ WORKDIR /app
10
+
11
+ COPY . .
12
+
13
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
14
+ echo "GGML_SYCL_F16 is set" && \
15
+ export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
16
+ fi && \
17
+ echo "Building with dynamic libs" && \
18
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
19
+ cmake --build build --config Release --target llama-server
20
+
21
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
22
+
23
+ RUN apt-get update && \
24
+ apt-get install -y libcurl4-openssl-dev curl
25
+
26
+ COPY --from=build /app/build/bin/llama-server /llama-server
27
+
28
+ ENV LC_ALL=C.utf8
29
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
30
+ ENV LLAMA_ARG_HOST=0.0.0.0
31
+
32
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
33
+
34
+ ENTRYPOINT [ "/llama-server" ]
llama.cpp/.devops/llama-server-musa.Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the MUSA runtime image
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ # MUSA architecture to build for (defaults to all supported archs)
12
+ ARG MUSA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Use the default MUSA archs if not specified
22
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
23
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
24
+ fi && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26
+ cmake --build build --config Release --target llama-server -j$(nproc) && \
27
+ mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
31
+
32
+ RUN apt-get update && \
33
+ apt-get install -y libcurl4-openssl-dev libgomp1 curl
34
+
35
+ COPY --from=build /app/lib/ /
36
+ COPY --from=build /app/build/bin/llama-server /llama-server
37
+
38
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
39
+ ENV LLAMA_ARG_HOST=0.0.0.0
40
+
41
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
42
+
43
+ ENTRYPOINT [ "/llama-server" ]
llama.cpp/.devops/llama-server-rocm.Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=5.6
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8
+
9
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13
+ # This is mostly tied to rocBLAS supported archs.
14
+ ARG ROCM_DOCKER_ARCH="\
15
+ gfx803 \
16
+ gfx900 \
17
+ gfx906 \
18
+ gfx908 \
19
+ gfx90a \
20
+ gfx1010 \
21
+ gfx1030 \
22
+ gfx1100 \
23
+ gfx1101 \
24
+ gfx1102"
25
+
26
+ COPY requirements.txt requirements.txt
27
+ COPY requirements requirements
28
+
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -r requirements.txt
31
+
32
+ WORKDIR /app
33
+
34
+ COPY . .
35
+
36
+ # Set nvcc architecture
37
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
38
+ # Enable ROCm
39
+ ENV GGML_HIPBLAS=1
40
+ ENV CC=/opt/rocm/llvm/bin/clang
41
+ ENV CXX=/opt/rocm/llvm/bin/clang++
42
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
43
+ ENV LLAMA_ARG_HOST=0.0.0.0
44
+
45
+ # Enable cURL
46
+ ENV LLAMA_CURL=1
47
+ RUN apt-get update && \
48
+ apt-get install -y libcurl4-openssl-dev curl
49
+
50
+ RUN make -j$(nproc) llama-server
51
+
52
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
53
+
54
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/llama-server-vulkan.Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=jammy
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget
7
+
8
+ # Install Vulkan SDK and cURL
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+ COPY . .
17
+ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
18
+ cmake --build build --config Release --target llama-server
19
+
20
+ # Clean up
21
+ WORKDIR /
22
+ RUN cp /app/build/bin/llama-server /llama-server && \
23
+ rm -rf /app
24
+
25
+ ENV LC_ALL=C.utf8
26
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
27
+ ENV LLAMA_ARG_HOST=0.0.0.0
28
+
29
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
30
+
31
+ ENTRYPOINT [ "/llama-server" ]
llama.cpp/.devops/llama-server.Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
7
+
8
+ WORKDIR /app
9
+
10
+ COPY . .
11
+
12
+
13
+ RUN \
14
+ # Build multiple versions of the CPU backend
15
+ scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16
+ scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17
+ scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18
+ scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19
+ # Build llama-server
20
+ cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21
+ cmake --build build --target llama-server -j $(nproc) && \
22
+ # Copy the built libraries to /app/lib
23
+ mkdir -p /app/lib && \
24
+ mv libggml-cpu* /app/lib/ && \
25
+ find build -name "*.so" -exec cp {} /app/lib/ \;
26
+
27
+ FROM ubuntu:$UBUNTU_VERSION AS runtime
28
+
29
+ RUN apt-get update && \
30
+ apt-get install -y libcurl4-openssl-dev libgomp1 curl
31
+
32
+ COPY --from=build /app/build/bin/llama-server /llama-server
33
+ COPY --from=build /app/lib/ /
34
+
35
+ ENV LC_ALL=C.utf8
36
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
37
+ ENV LLAMA_ARG_HOST=0.0.0.0
38
+
39
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
40
+
41
+ ENTRYPOINT [ "/llama-server" ]
llama.cpp/.devops/nix/apps.nix ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ perSystem =
3
+ { config, lib, ... }:
4
+ {
5
+ apps =
6
+ let
7
+ inherit (config.packages) default;
8
+ binaries = [
9
+ "llama-cli"
10
+ "llama-embedding"
11
+ "llama-server"
12
+ "llama-quantize"
13
+ ];
14
+ mkApp = name: {
15
+ type = "app";
16
+ program = "${default}/bin/${name}";
17
+ };
18
+ in
19
+ lib.genAttrs binaries mkApp;
20
+ };
21
+ }
llama.cpp/.devops/nix/devshells.nix ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+
3
+ {
4
+ perSystem =
5
+ {
6
+ config,
7
+ lib,
8
+ system,
9
+ ...
10
+ }:
11
+ {
12
+ devShells =
13
+ let
14
+ pkgs = import inputs.nixpkgs { inherit system; };
15
+ stdenv = pkgs.stdenv;
16
+ scripts = config.packages.python-scripts;
17
+ in
18
+ lib.pipe (config.packages) [
19
+ (lib.concatMapAttrs (
20
+ name: package: {
21
+ ${name} = pkgs.mkShell {
22
+ name = "${name}";
23
+ inputsFrom = [ package ];
24
+ shellHook = ''
25
+ echo "Entering ${name} devShell"
26
+ '';
27
+ };
28
+ "${name}-extra" =
29
+ if (name == "python-scripts") then
30
+ null
31
+ else
32
+ pkgs.mkShell {
33
+ name = "${name}-extra";
34
+ inputsFrom = [
35
+ package
36
+ scripts
37
+ ];
38
+ # Extra packages that *may* be used by some scripts
39
+ packages = [
40
+ pkgs.python3Packages.tiktoken
41
+ ];
42
+ shellHook = ''
43
+ echo "Entering ${name} devShell"
44
+ addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45
+ '';
46
+ };
47
+ }
48
+ ))
49
+ (lib.filterAttrs (name: value: value != null))
50
+ ];
51
+ };
52
+ }
llama.cpp/.devops/nix/docker.nix ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ dockerTools,
4
+ buildEnv,
5
+ llama-cpp,
6
+ interactive ? true,
7
+ coreutils,
8
+ }:
9
+
10
+ # A tar that can be fed into `docker load`:
11
+ #
12
+ # $ nix build .#llamaPackages.docker
13
+ # $ docker load < result
14
+
15
+ # For details and variations cf.
16
+ # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17
+ # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18
+ # - https://nixery.dev/
19
+
20
+ # Approximate (compressed) sizes, at the time of writing, are:
21
+ #
22
+ # .#llamaPackages.docker: 125M;
23
+ # .#llamaPackagesCuda.docker: 537M;
24
+ # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25
+
26
+ dockerTools.buildLayeredImage {
27
+ name = llama-cpp.pname;
28
+ tag = "latest";
29
+
30
+ contents =
31
+ [ llama-cpp ]
32
+ ++ lib.optionals interactive [
33
+ coreutils
34
+ dockerTools.binSh
35
+ dockerTools.caCertificates
36
+ ];
37
+ }
llama.cpp/.devops/nix/jetson-support.nix ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ perSystem =
4
+ {
5
+ config,
6
+ system,
7
+ lib,
8
+ pkgsCuda,
9
+ ...
10
+ }:
11
+ {
12
+ legacyPackages =
13
+ let
14
+ caps.llamaPackagesXavier = "7.2";
15
+ caps.llamaPackagesOrin = "8.7";
16
+ caps.llamaPackagesTX2 = "6.2";
17
+ caps.llamaPackagesNano = "5.3";
18
+
19
+ pkgsFor =
20
+ cap:
21
+ import inputs.nixpkgs {
22
+ inherit system;
23
+ config = {
24
+ cudaSupport = true;
25
+ cudaCapabilities = [ cap ];
26
+ cudaEnableForwardCompat = false;
27
+ inherit (pkgsCuda.config) allowUnfreePredicate;
28
+ };
29
+ };
30
+ in
31
+ builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
32
+
33
+ packages = lib.optionalAttrs (system == "aarch64-linux") {
34
+ jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
35
+ jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
36
+ jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
37
+ };
38
+ };
39
+ }
llama.cpp/.devops/nix/nixpkgs-instances.nix ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ # The _module.args definitions are passed on to modules as arguments. E.g.
4
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
5
+ # `_module.args.pkgs` (defined in this case by flake-parts).
6
+ perSystem =
7
+ { system, ... }:
8
+ {
9
+ _module.args = {
10
+ # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11
+ # again, the below creates several nixpkgs instances which the
12
+ # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13
+ #
14
+ # This is currently "slow" and "expensive", on a certain scale.
15
+ # This also isn't "right" in that this hinders dependency injection at
16
+ # the level of flake inputs. This might get removed in the foreseeable
17
+ # future.
18
+ #
19
+ # Note that you can use these expressions without Nix
20
+ # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21
+
22
+ pkgsCuda = import inputs.nixpkgs {
23
+ inherit system;
24
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
25
+ # and ucx are built with CUDA support)
26
+ config.cudaSupport = true;
27
+ config.allowUnfreePredicate =
28
+ p:
29
+ builtins.all (
30
+ license:
31
+ license.free
32
+ || builtins.elem license.shortName [
33
+ "CUDA EULA"
34
+ "cuDNN EULA"
35
+ ]
36
+ ) (p.meta.licenses or [ p.meta.license ]);
37
+ };
38
+ # Ensure dependencies use ROCm consistently
39
+ pkgsRocm = import inputs.nixpkgs {
40
+ inherit system;
41
+ config.rocmSupport = true;
42
+ };
43
+ };
44
+ };
45
+ }
llama.cpp/.devops/nix/package-gguf-py.nix ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ llamaVersion,
4
+ numpy,
5
+ tqdm,
6
+ sentencepiece,
7
+ pyyaml,
8
+ poetry-core,
9
+ buildPythonPackage,
10
+ pytestCheckHook,
11
+ }:
12
+
13
+ buildPythonPackage {
14
+ pname = "gguf";
15
+ version = llamaVersion;
16
+ pyproject = true;
17
+ nativeBuildInputs = [ poetry-core ];
18
+ propagatedBuildInputs = [
19
+ numpy
20
+ tqdm
21
+ sentencepiece
22
+ pyyaml
23
+ ];
24
+ src = lib.cleanSource ../../gguf-py;
25
+ pythonImportsCheck = [
26
+ "numpy"
27
+ "gguf"
28
+ ];
29
+ nativeCheckInputs = [ pytestCheckHook ];
30
+ doCheck = true;
31
+ meta = with lib; {
32
+ description = "Python package for writing binary files in the GGUF format";
33
+ license = licenses.mit;
34
+ maintainers = [ maintainers.ditsuke ];
35
+ };
36
+ }
llama.cpp/.devops/nix/package.nix ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ glibc,
4
+ config,
5
+ stdenv,
6
+ runCommand,
7
+ cmake,
8
+ ninja,
9
+ pkg-config,
10
+ git,
11
+ mpi,
12
+ blas,
13
+ cudaPackages,
14
+ autoAddDriverRunpath,
15
+ darwin,
16
+ rocmPackages,
17
+ vulkan-headers,
18
+ vulkan-loader,
19
+ curl,
20
+ shaderc,
21
+ useBlas ?
22
+ builtins.all (x: !x) [
23
+ useCuda
24
+ useMetalKit
25
+ useRocm
26
+ useVulkan
27
+ ]
28
+ && blas.meta.available,
29
+ useCuda ? config.cudaSupport,
30
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31
+ # Increases the runtime closure size by ~700M
32
+ useMpi ? false,
33
+ useRocm ? config.rocmSupport,
34
+ enableCurl ? true,
35
+ useVulkan ? false,
36
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
37
+
38
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
39
+ # otherwise we get libstdc++ errors downstream.
40
+ effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
41
+ enableStatic ? effectiveStdenv.hostPlatform.isStatic,
42
+ precompileMetalShaders ? false,
43
+ }:
44
+
45
+ let
46
+ inherit (lib)
47
+ cmakeBool
48
+ cmakeFeature
49
+ optionals
50
+ strings
51
+ ;
52
+
53
+ stdenv = throw "Use effectiveStdenv instead";
54
+
55
+ suffices =
56
+ lib.optionals useBlas [ "BLAS" ]
57
+ ++ lib.optionals useCuda [ "CUDA" ]
58
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
59
+ ++ lib.optionals useMpi [ "MPI" ]
60
+ ++ lib.optionals useRocm [ "ROCm" ]
61
+ ++ lib.optionals useVulkan [ "Vulkan" ];
62
+
63
+ pnameSuffix =
64
+ strings.optionalString (suffices != [ ])
65
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
66
+ descriptionSuffix = strings.optionalString (
67
+ suffices != [ ]
68
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
69
+
70
+ xcrunHost = runCommand "xcrunHost" { } ''
71
+ mkdir -p $out/bin
72
+ ln -s /usr/bin/xcrun $out/bin
73
+ '';
74
+
75
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
76
+ # separately
77
+ darwinBuildInputs =
78
+ with darwin.apple_sdk.frameworks;
79
+ [
80
+ Accelerate
81
+ CoreVideo
82
+ CoreGraphics
83
+ ]
84
+ ++ optionals useMetalKit [ MetalKit ];
85
+
86
+ cudaBuildInputs = with cudaPackages; [
87
+ cuda_cudart
88
+ cuda_cccl # <nv/target>
89
+ libcublas
90
+ ];
91
+
92
+ rocmBuildInputs = with rocmPackages; [
93
+ clr
94
+ hipblas
95
+ rocblas
96
+ ];
97
+
98
+ vulkanBuildInputs = [
99
+ vulkan-headers
100
+ vulkan-loader
101
+ shaderc
102
+ ];
103
+ in
104
+
105
+ effectiveStdenv.mkDerivation (finalAttrs: {
106
+ pname = "llama-cpp${pnameSuffix}";
107
+ version = llamaVersion;
108
+
109
+ # Note: none of the files discarded here are visible in the sandbox or
110
+ # affect the output hash. This also means they can be modified without
111
+ # triggering a rebuild.
112
+ src = lib.cleanSourceWith {
113
+ filter =
114
+ name: type:
115
+ let
116
+ noneOf = builtins.all (x: !x);
117
+ baseName = baseNameOf name;
118
+ in
119
+ noneOf [
120
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
121
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
122
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
123
+ (baseName == "flake.lock")
124
+ ];
125
+ src = lib.cleanSource ../../.;
126
+ };
127
+
128
+ postPatch = ''
129
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
130
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
133
+ '';
134
+
135
+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
136
+ # `default.metallib` may be compiled with Metal compiler from XCode
137
+ # and we need to escape sandbox on MacOS to access Metal compiler.
138
+ # `xcrun` is used find the path of the Metal compiler, which is varible
139
+ # and not on $PATH
140
+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
141
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
142
+
143
+ nativeBuildInputs =
144
+ [
145
+ cmake
146
+ ninja
147
+ pkg-config
148
+ git
149
+ ]
150
+ ++ optionals useCuda [
151
+ cudaPackages.cuda_nvcc
152
+
153
+ autoAddDriverRunpath
154
+ ]
155
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
156
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
157
+
158
+ buildInputs =
159
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
160
+ ++ optionals useCuda cudaBuildInputs
161
+ ++ optionals useMpi [ mpi ]
162
+ ++ optionals useRocm rocmBuildInputs
163
+ ++ optionals useBlas [ blas ]
164
+ ++ optionals useVulkan vulkanBuildInputs
165
+ ++ optionals enableCurl [ curl ];
166
+
167
+ cmakeFlags =
168
+ [
169
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
170
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
171
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
172
+ (cmakeBool "LLAMA_CURL" enableCurl)
173
+ (cmakeBool "GGML_NATIVE" false)
174
+ (cmakeBool "GGML_BLAS" useBlas)
175
+ (cmakeBool "GGML_CUDA" useCuda)
176
+ (cmakeBool "GGML_HIP" useRocm)
177
+ (cmakeBool "GGML_METAL" useMetalKit)
178
+ (cmakeBool "GGML_VULKAN" useVulkan)
179
+ (cmakeBool "GGML_STATIC" enableStatic)
180
+ ]
181
+ ++ optionals useCuda [
182
+ (
183
+ with cudaPackages.flags;
184
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
185
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
186
+ )
187
+ )
188
+ ]
189
+ ++ optionals useRocm [
190
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
191
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
192
+ ]
193
+ ++ optionals useMetalKit [
194
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
195
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
196
+ ];
197
+
198
+ # Environment variables needed for ROCm
199
+ env = optionals useRocm {
200
+ ROCM_PATH = "${rocmPackages.clr}";
201
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
202
+ };
203
+
204
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
205
+ # if they haven't been added yet.
206
+ postInstall = ''
207
+ mkdir -p $out/include
208
+ cp $src/include/llama.h $out/include/
209
+ '';
210
+
211
+ meta = {
212
+ # Configurations we don't want even the CI to evaluate. Results in the
213
+ # "unsupported platform" messages. This is mostly a no-op, because
214
+ # cudaPackages would've refused to evaluate anyway.
215
+ badPlatforms = optionals useCuda lib.platforms.darwin;
216
+
217
+ # Configurations that are known to result in build failures. Can be
218
+ # overridden by importing Nixpkgs with `allowBroken = true`.
219
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
220
+
221
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
222
+ homepage = "https://github.com/ggerganov/llama.cpp/";
223
+ license = lib.licenses.mit;
224
+
225
+ # Accommodates `nix run` and `lib.getExe`
226
+ mainProgram = "llama-cli";
227
+
228
+ # These people might respond, on the best effort basis, if you ping them
229
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
230
+ # Consider adding yourself to this list if you want to ensure this flake
231
+ # stays maintained and you're willing to invest your time. Do not add
232
+ # other people without their consent. Consider removing people after
233
+ # they've been unreachable for long periods of time.
234
+
235
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
236
+ # an attrset following the same format as in
237
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
238
+ maintainers = with lib.maintainers; [
239
+ philiptaron
240
+ SomeoneSerge
241
+ ];
242
+
243
+ # Extend `badPlatforms` instead
244
+ platforms = lib.platforms.all;
245
+ };
246
+ })
llama.cpp/.devops/nix/python-scripts.nix ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ stdenv,
4
+ buildPythonPackage,
5
+ poetry-core,
6
+ mkShell,
7
+ python3Packages,
8
+ gguf-py,
9
+ }@inputs:
10
+
11
+ let
12
+ llama-python-deps = with python3Packages; [
13
+ numpy
14
+ sentencepiece
15
+ transformers
16
+ protobuf
17
+ torchWithoutCuda
18
+ gguf-py
19
+ tqdm
20
+
21
+ # for scripts/compare-llama-bench.py
22
+ gitpython
23
+ tabulate
24
+
25
+ # for examples/pydantic-models-to-grammar-examples.py
26
+ docstring-parser
27
+ pydantic
28
+
29
+ ];
30
+
31
+ llama-python-test-deps = with python3Packages; [
32
+ # Server bench
33
+ matplotlib
34
+
35
+ # server tests
36
+ openai
37
+ pytest
38
+ prometheus-client
39
+ ];
40
+ in
41
+
42
+ buildPythonPackage ({
43
+ pname = "llama-scripts";
44
+ version = "0.0.0";
45
+ pyproject = true;
46
+
47
+ # NOTE: The files filtered out here are not visible in the build sandbox, neither
48
+ # do they affect the output hash. They can be modified without triggering a rebuild.
49
+ src = lib.cleanSourceWith {
50
+ filter =
51
+ name: type:
52
+ let
53
+ any = builtins.any (x: x);
54
+ baseName = builtins.baseNameOf name;
55
+ in
56
+ any [
57
+ (lib.hasSuffix ".py" name)
58
+ (baseName == "README.md")
59
+ (baseName == "pyproject.toml")
60
+ ];
61
+ src = lib.cleanSource ../../.;
62
+ };
63
+ nativeBuildInputs = [ poetry-core ];
64
+ nativeCheckInputs = llama-python-test-deps;
65
+ dependencies = llama-python-deps;
66
+ })
llama.cpp/.devops/nix/scope.nix ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ newScope,
4
+ python3,
5
+ llamaVersion ? "0.0.0",
6
+ }:
7
+
8
+ let
9
+ pythonPackages = python3.pkgs;
10
+ buildPythonPackage = pythonPackages.buildPythonPackage;
11
+ numpy = pythonPackages.numpy;
12
+ tqdm = pythonPackages.tqdm;
13
+ sentencepiece = pythonPackages.sentencepiece;
14
+ pyyaml = pythonPackages.pyyaml;
15
+ poetry-core = pythonPackages.poetry-core;
16
+ pytestCheckHook = pythonPackages.pytestCheckHook;
17
+ in
18
+
19
+ # We're using `makeScope` instead of just writing out an attrset
20
+ # because it allows users to apply overlays later using `overrideScope'`.
21
+ # Cf. https://noogle.dev/f/lib/makeScope
22
+
23
+ lib.makeScope newScope (self: {
24
+ inherit llamaVersion;
25
+ gguf-py = self.callPackage ./package-gguf-py.nix {
26
+ inherit
27
+ buildPythonPackage
28
+ numpy
29
+ tqdm
30
+ sentencepiece
31
+ poetry-core
32
+ pyyaml
33
+ pytestCheckHook
34
+ ;
35
+ };
36
+ python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
37
+ llama-cpp = self.callPackage ./package.nix { };
38
+ docker = self.callPackage ./docker.nix { };
39
+ docker-min = self.callPackage ./docker.nix { interactive = false; };
40
+ sif = self.callPackage ./sif.nix { };
41
+ })
llama.cpp/.devops/nix/sif.nix ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ singularity-tools,
4
+ llama-cpp,
5
+ bashInteractive,
6
+ interactive ? false,
7
+ }:
8
+
9
+ let
10
+ optionalInt = cond: x: if cond then x else 0;
11
+ in
12
+ singularity-tools.buildImage rec {
13
+ inherit (llama-cpp) name;
14
+ contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15
+
16
+ # These are excessive (but safe) for most variants. Building singularity
17
+ # images requires superuser privileges, so we build them inside a VM in a
18
+ # writable image of pre-determined size.
19
+ #
20
+ # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21
+ #
22
+ # Expected image sizes:
23
+ # - cpu/blas: 150M,
24
+ # - cuda, all gencodes: 560M,
25
+ diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26
+ memSize = diskSize;
27
+ }
llama.cpp/.devops/tools.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # Read the first argument into a variable
5
+ arg1="$1"
6
+
7
+ # Shift the arguments to remove the first one
8
+ shift
9
+
10
+ if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11
+ python3 ./convert_hf_to_gguf.py "$@"
12
+ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
13
+ ./llama-quantize "$@"
14
+ elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
15
+ ./llama-cli "$@"
16
+ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
17
+ echo "Converting PTH to GGML..."
18
+ for i in `ls $1/$2/ggml-model-f16.bin*`; do
19
+ if [ -f "${i/f16/q4_0}" ]; then
20
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
21
+ else
22
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
23
+ ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
24
+ fi
25
+ done
26
+ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
27
+ ./llama-server "$@"
28
+ else
29
+ echo "Unknown command: $arg1"
30
+ echo "Available commands: "
31
+ echo " --run (-r): Run a model previously converted into ggml"
32
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
33
+ echo " --convert (-c): Convert a llama model into ggml"
34
+ echo " ex: --outtype f16 \"/models/7B/\" "
35
+ echo " --quantize (-q): Optimize with quantization process ggml"
36
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
37
+ echo " --all-in-one (-a): Execute --convert & --quantize"
38
+ echo " ex: \"/models/\" 7B"
39
+ echo " --server (-s): Run a model on the server"
40
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
41
+ fi
llama.cpp/.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ # Do not ignore .git directory, otherwise the reported build number will always be 0
5
+ .github/
6
+ .gitignore
7
+ .vs/
8
+ .vscode/
9
+ .DS_Store
10
+
11
+ build*/
12
+
13
+ models/*
14
+
15
+ /llama-cli
16
+ /llama-quantize
17
+
18
+ arm_neon.h
19
+ compile_commands.json
20
+ Dockerfile
llama.cpp/.ecrc ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
3
+ "Disable": {
4
+ "IndentSize": true
5
+ }
6
+ }
llama.cpp/.editorconfig ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://EditorConfig.org
2
+
3
+ # Top-most EditorConfig file
4
+ root = true
5
+
6
+ # Unix-style newlines with a newline ending every file, utf-8 charset
7
+ [*]
8
+ end_of_line = lf
9
+ insert_final_newline = true
10
+ trim_trailing_whitespace = true
11
+ charset = utf-8
12
+ indent_style = space
13
+ indent_size = 4
14
+
15
+ [Makefile]
16
+ indent_style = tab
17
+
18
+ [scripts/*.mk]
19
+ indent_style = tab
20
+
21
+ [prompts/*.txt]
22
+ insert_final_newline = unset
23
+
24
+ [examples/server/public/*]
25
+ indent_size = 2
26
+
27
+ [examples/server/public/deps_*]
28
+ trim_trailing_whitespace = unset
29
+ indent_style = unset
30
+ indent_size = unset
31
+
32
+ [examples/server/deps_*]
33
+ trim_trailing_whitespace = unset
34
+ indent_style = unset
35
+ indent_size = unset
36
+
37
+ [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
38
+ indent_style = tab
39
+
40
+ [examples/cvector-generator/*.txt]
41
+ trim_trailing_whitespace = unset
42
+ insert_final_newline = unset
llama.cpp/.flake8 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 125
3
+ ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4
+ exclude =
5
+ # Do not traverse examples
6
+ examples,
7
+ # Do not include package initializers
8
+ __init__.py,
9
+ # No need to traverse our git directory
10
+ .git,
11
+ # There's no value in checking cache directories
12
+ __pycache__,
13
+ # No need to include the build path
14
+ build,
15
+ # This contains builds that we don't want to check
16
+ dist # This is generated with `python build .` for package releases
17
+ # max-complexity = 10
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (compilation)
2
+ description: Something goes wrong when trying to compile llama.cpp.
3
+ title: "Compile bug: "
4
+ labels: ["bug-unconfirmed", "compilation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the compilation of llama.cpp fails.
11
+ Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
12
+ If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
13
+ by clearing `~/.cache/ccache` (on Linux).
14
+ - type: textarea
15
+ id: commit
16
+ attributes:
17
+ label: Git commit
18
+ description: Which commit are you trying to compile?
19
+ placeholder: |
20
+ $git rev-parse HEAD
21
+ 84a07a17b1b08cf2b9747c633a2372782848a27f
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: true
38
+ - type: dropdown
39
+ id: backends
40
+ attributes:
41
+ label: GGML backends
42
+ description: Which GGML backends do you know to be affected?
43
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
44
+ multiple: true
45
+ validations:
46
+ required: true
47
+ - type: textarea
48
+ id: info
49
+ attributes:
50
+ label: Problem description & steps to reproduce
51
+ description: >
52
+ Please give us a summary of the problem and tell us how to reproduce it.
53
+ If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
54
+ placeholder: >
55
+ I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
56
+ Here are the exact commands that I used: ...
57
+ validations:
58
+ required: true
59
+ - type: textarea
60
+ id: first_bad_commit
61
+ attributes:
62
+ label: First Bad Commit
63
+ description: >
64
+ If the bug was not present on an earlier version: when did it start appearing?
65
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
66
+ validations:
67
+ required: false
68
+ - type: textarea
69
+ id: logs
70
+ attributes:
71
+ label: Relevant log output
72
+ description: >
73
+ Please copy and paste any relevant log output, including the command that you entered and any generated text.
74
+ This will be automatically formatted into code, so no need for backticks.
75
+ render: shell
76
+ validations:
77
+ required: true
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (model use)
2
+ description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
3
+ title: "Eval bug: "
4
+ labels: ["bug-unconfirmed", "model evaluation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the model evaluation results
11
+ (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
12
+ If you encountered the issue while using an external UI (e.g. ollama),
13
+ please reproduce your issue using one of the examples/binaries in this repository.
14
+ The `llama-cli` binary can be used for simple and reproducible model inference.
15
+ - type: textarea
16
+ id: version
17
+ attributes:
18
+ label: Name and Version
19
+ description: Which version of our software are you running? (use `--version` to get a version string)
20
+ placeholder: |
21
+ $./llama-cli --version
22
+ version: 2999 (42b4109e)
23
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
24
+ validations:
25
+ required: true
26
+ - type: dropdown
27
+ id: operating-system
28
+ attributes:
29
+ label: Operating systems
30
+ description: Which operating systems do you know to be affected?
31
+ multiple: true
32
+ options:
33
+ - Linux
34
+ - Mac
35
+ - Windows
36
+ - BSD
37
+ - Other? (Please let us know in description)
38
+ validations:
39
+ required: true
40
+ - type: dropdown
41
+ id: backends
42
+ attributes:
43
+ label: GGML backends
44
+ description: Which GGML backends do you know to be affected?
45
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
46
+ multiple: true
47
+ validations:
48
+ required: true
49
+ - type: textarea
50
+ id: hardware
51
+ attributes:
52
+ label: Hardware
53
+ description: Which CPUs/GPUs are you using?
54
+ placeholder: >
55
+ e.g. Ryzen 5950X + 2x RTX 4090
56
+ validations:
57
+ required: true
58
+ - type: textarea
59
+ id: model
60
+ attributes:
61
+ label: Models
62
+ description: >
63
+ Which model(s) at which quantization were you using when encountering the bug?
64
+ If you downloaded a GGUF file off of Huggingface, please provide a link.
65
+ placeholder: >
66
+ e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
67
+ validations:
68
+ required: false
69
+ - type: textarea
70
+ id: info
71
+ attributes:
72
+ label: Problem description & steps to reproduce
73
+ description: >
74
+ Please give us a summary of the problem and tell us how to reproduce it.
75
+ If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
76
+ that information would be very much appreciated by us.
77
+ placeholder: >
78
+ e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
79
+ When I use -ngl 0 it works correctly.
80
+ Here are the exact commands that I used: ...
81
+ validations:
82
+ required: true
83
+ - type: textarea
84
+ id: first_bad_commit
85
+ attributes:
86
+ label: First Bad Commit
87
+ description: >
88
+ If the bug was not present on an earlier version: when did it start appearing?
89
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
90
+ validations:
91
+ required: false
92
+ - type: textarea
93
+ id: logs
94
+ attributes:
95
+ label: Relevant log output
96
+ description: >
97
+ Please copy and paste any relevant log output, including the command that you entered and any generated text.
98
+ This will be automatically formatted into code, so no need for backticks.
99
+ render: shell
100
+ validations:
101
+ required: true
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (misc.)
2
+ description: Something is not working the way it should (and it's not covered by any of the above cases).
3
+ title: "Misc. bug: "
4
+ labels: ["bug-unconfirmed"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for miscellaneous bugs that don't fit into any other category.
11
+ If you encountered the issue while using an external UI (e.g. ollama),
12
+ please reproduce your issue using one of the examples/binaries in this repository.
13
+ - type: textarea
14
+ id: version
15
+ attributes:
16
+ label: Name and Version
17
+ description: Which version of our software is affected? (You can use `--version` to get a version string.)
18
+ placeholder: |
19
+ $./llama-cli --version
20
+ version: 2999 (42b4109e)
21
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: false
38
+ - type: dropdown
39
+ id: module
40
+ attributes:
41
+ label: Which llama.cpp modules do you know to be affected?
42
+ multiple: true
43
+ options:
44
+ - Documentation/Github
45
+ - libllama (core library)
46
+ - llama-cli
47
+ - llama-server
48
+ - llama-bench
49
+ - llama-quantize
50
+ - Python/Bash scripts
51
+ - Test code
52
+ - Other (Please specify in the next section)
53
+ validations:
54
+ required: false
55
+ - type: textarea
56
+ id: info
57
+ attributes:
58
+ label: Problem description & steps to reproduce
59
+ description: >
60
+ Please give us a summary of the problem and tell us how to reproduce it (if applicable).
61
+ validations:
62
+ required: true
63
+ - type: textarea
64
+ id: first_bad_commit
65
+ attributes:
66
+ label: First Bad Commit
67
+ description: >
68
+ If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
69
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
70
+ validations:
71
+ required: false
72
+ - type: textarea
73
+ id: logs
74
+ attributes:
75
+ label: Relevant log output
76
+ description: >
77
+ If applicable, please copy and paste any relevant log output, including the command that you entered and any generated text.
78
+ This will be automatically formatted into code, so no need for backticks.
79
+ render: shell
80
+ validations:
81
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Enhancement
2
+ description: Used to request enhancements for llama.cpp.
3
+ title: "Feature Request: "
4
+ labels: ["enhancement"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
10
+
11
+ - type: checkboxes
12
+ id: prerequisites
13
+ attributes:
14
+ label: Prerequisites
15
+ description: Please confirm the following before submitting your enhancement request.
16
+ options:
17
+ - label: I am running the latest code. Mention the version if possible as well.
18
+ required: true
19
+ - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
20
+ required: true
21
+ - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
22
+ required: true
23
+ - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
24
+ required: true
25
+
26
+ - type: textarea
27
+ id: feature-description
28
+ attributes:
29
+ label: Feature Description
30
+ description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
31
+ placeholder: Detailed description of the enhancement
32
+ validations:
33
+ required: true
34
+
35
+ - type: textarea
36
+ id: motivation
37
+ attributes:
38
+ label: Motivation
39
+ description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
40
+ placeholder: Explanation of why this feature is needed and its benefits
41
+ validations:
42
+ required: true
43
+
44
+ - type: textarea
45
+ id: possible-implementation
46
+ attributes:
47
+ label: Possible Implementation
48
+ description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
49
+ placeholder: Detailed description of potential implementation
50
+ validations:
51
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Research
2
+ description: Track new technical research area.
3
+ title: "Research: "
4
+ labels: ["research 🔬"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
10
+
11
+ - type: checkboxes
12
+ id: research-stage
13
+ attributes:
14
+ label: Research Stage
15
+ description: Track general state of this research ticket
16
+ options:
17
+ - label: Background Research (Let's try to avoid reinventing the wheel)
18
+ - label: Hypothesis Formed (How do you think this will work and it's effect?)
19
+ - label: Strategy / Implementation Forming
20
+ - label: Analysis of results
21
+ - label: Debrief / Documentation (So people in the future can learn from us)
22
+
23
+ - type: textarea
24
+ id: background
25
+ attributes:
26
+ label: Previous existing literature and research
27
+ description: Whats the current state of the art and whats the motivation for this research?
28
+
29
+ - type: textarea
30
+ id: hypothesis
31
+ attributes:
32
+ label: Hypothesis
33
+ description: How do you think this will work and it's effect?
34
+
35
+ - type: textarea
36
+ id: implementation
37
+ attributes:
38
+ label: Implementation
39
+ description: Got an approach? e.g. a PR ready to go?
40
+
41
+ - type: textarea
42
+ id: analysis
43
+ attributes:
44
+ label: Analysis
45
+ description: How does the proposed implementation behave?
46
+
47
+ - type: textarea
48
+ id: logs
49
+ attributes:
50
+ label: Relevant log output
51
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
52
+ render: shell
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Refactor (Maintainers)
2
+ description: Used to track refactoring opportunities.
3
+ title: "Refactor: "
4
+ labels: ["refactor"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
10
+ Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
11
+
12
+ - type: textarea
13
+ id: background-description
14
+ attributes:
15
+ label: Background Description
16
+ description: Please provide a detailed written description of the pain points you are trying to solve.
17
+ placeholder: Detailed description behind your motivation to request refactor
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: possible-approaches
23
+ attributes:
24
+ label: Possible Refactor Approaches
25
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
26
+ placeholder: Your idea of possible refactoring opportunity/approaches
27
+ validations:
28
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: true
2
+ contact_links:
3
+ - name: Got an idea?
4
+ url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
5
+ about: Pop it there. It may then become an enhancement ticket.
6
+ - name: Got a question?
7
+ url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
8
+ about: Ask a question there!
9
+ - name: Want to contribute?
10
+ url: https://github.com/ggerganov/llama.cpp/wiki/contribute
11
+ about: Head to the contribution guide page of the wiki for areas you can help with
llama.cpp/.github/labeler.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/actions/labeler
2
+ Kompute:
3
+ - changed-files:
4
+ - any-glob-to-any-file:
5
+ - ggml/include/ggml-kompute.h
6
+ - ggml/src/ggml-kompute/**
7
+ - README-kompute.md
8
+ Apple Metal:
9
+ - changed-files:
10
+ - any-glob-to-any-file:
11
+ - ggml/include/ggml-metal.h
12
+ - ggml/src/ggml-metal/**
13
+ - README-metal.md
14
+ SYCL:
15
+ - changed-files:
16
+ - any-glob-to-any-file:
17
+ - ggml/include/ggml-sycl.h
18
+ - ggml/src/ggml-sycl/**
19
+ - docs/backend/SYCL.md
20
+ - examples/sycl/**
21
+ Nvidia GPU:
22
+ - changed-files:
23
+ - any-glob-to-any-file:
24
+ - ggml/include/ggml-cuda.h
25
+ - ggml/src/ggml-cuda/**
26
+ Vulkan:
27
+ - changed-files:
28
+ - any-glob-to-any-file:
29
+ - ggml/include/ggml-vulkan.h
30
+ - ggml/src/ggml-vulkan/**
31
+ documentation:
32
+ - changed-files:
33
+ - any-glob-to-any-file:
34
+ - docs/**
35
+ - media/**
36
+ testing:
37
+ - changed-files:
38
+ - any-glob-to-any-file:
39
+ - tests/**
40
+ build:
41
+ - changed-files:
42
+ - any-glob-to-any-file:
43
+ - cmake/**
44
+ - CMakeLists.txt
45
+ - CMakePresets.json
46
+ examples:
47
+ - changed-files:
48
+ - any-glob-to-any-file: examples/**
49
+ devops:
50
+ - changed-files:
51
+ - any-glob-to-any-file:
52
+ - .devops/**
53
+ - .github/**
54
+ - ci/**
55
+ python:
56
+ - changed-files:
57
+ - any-glob-to-any-file:
58
+ - "**/*.py"
59
+ - requirements/**
60
+ - gguf-py/**
61
+ - .flake8
62
+ script:
63
+ - changed-files:
64
+ - any-glob-to-any-file:
65
+ - scripts/**
66
+ android:
67
+ - changed-files:
68
+ - any-glob-to-any-file:
69
+ - examples/llama.android/**
70
+ server:
71
+ - changed-files:
72
+ - any-glob-to-any-file:
73
+ - examples/server/**
74
+ ggml:
75
+ - changed-files:
76
+ - any-glob-to-any-file:
77
+ - ggml/**
78
+ nix:
79
+ - changed-files:
80
+ - any-glob-to-any-file:
81
+ - "**/*.nix"
82
+ - .github/workflows/nix-*.yml
83
+ - .devops/nix/nixpkgs-instances.nix
84
+ embedding:
85
+ - changed-files:
86
+ - any-glob-to-any-file: examples/embedding/
llama.cpp/.github/pull_request_template.md ADDED
@@ -0,0 +1 @@
 
 
1
+ *Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
llama.cpp/.github/workflows/bench.yml.disabled ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: there have been some issues with the workflow, so disabling for now
2
+ # https://github.com/ggerganov/llama.cpp/issues/7893
3
+ #
4
+ # Benchmark
5
+ name: Benchmark
6
+
7
+ on:
8
+ workflow_dispatch:
9
+ inputs:
10
+ gpu-series:
11
+ description: 'Azure GPU series to run with'
12
+ required: true
13
+ type: choice
14
+ options:
15
+ - Standard_NC4as_T4_v3
16
+ - Standard_NC24ads_A100_v4
17
+ - Standard_NC80adis_H100_v5
18
+ sha:
19
+ description: 'Commit SHA1 to build'
20
+ required: false
21
+ type: string
22
+ duration:
23
+ description: 'Duration of the bench'
24
+ type: string
25
+ default: 10m
26
+
27
+ push:
28
+ branches:
29
+ - master
30
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
31
+ pull_request_target:
32
+ types: [opened, synchronize, reopened]
33
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
34
+ schedule:
35
+ - cron: '04 2 * * *'
36
+
37
+ concurrency:
38
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
39
+ cancel-in-progress: true
40
+
41
+ jobs:
42
+ bench-server-baseline:
43
+ runs-on: Standard_NC4as_T4_v3
44
+ env:
45
+ RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
46
+ N_USERS: 8
47
+ DURATION: 10m
48
+
49
+ strategy:
50
+ matrix:
51
+ model: [phi-2]
52
+ ftype: [q4_0, q8_0, f16]
53
+ include:
54
+ - model: phi-2
55
+ ftype: q4_0
56
+ pr_comment_enabled: "true"
57
+
58
+ if: |
59
+ inputs.gpu-series == 'Standard_NC4as_T4_v3'
60
+ || (
61
+ github.event_name == 'schedule'
62
+ && github.ref_name == 'master'
63
+ && github.repository_owner == 'ggerganov'
64
+ )
65
+ || github.event_name == 'pull_request_target'
66
+ || (
67
+ github.event_name == 'push'
68
+ && github.event.ref == 'refs/heads/master'
69
+ && github.repository_owner == 'ggerganov'
70
+ )
71
+ steps:
72
+ - name: Clone
73
+ id: checkout
74
+ uses: actions/checkout@v4
75
+ with:
76
+ fetch-depth: 0
77
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
78
+
79
+ - name: Install python env
80
+ id: pipenv
81
+ run: |
82
+ cd examples/server/bench
83
+ python3 -m venv venv
84
+ source venv/bin/activate
85
+ pip install -r requirements.txt
86
+
87
+ - name: Prometheus
88
+ id: install_prometheus
89
+ run: |
90
+ wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
91
+ tar xzf prometheus*.tar.gz --strip-components=1
92
+ ./prometheus --config.file=examples/server/bench/prometheus.yml &
93
+ while ! nc -z localhost 9090; do
94
+ sleep 0.1
95
+ done
96
+
97
+ - name: Set up Go
98
+ uses: actions/setup-go@v5
99
+ with:
100
+ go-version: '1.21'
101
+
102
+ - name: Install k6 and xk6-sse
103
+ id: k6_installation
104
+ run: |
105
+ cd examples/server/bench
106
+ go install go.k6.io/xk6/cmd/xk6@latest
107
+ xk6 build master \
108
+ --with github.com/phymbert/xk6-sse
109
+
110
+ - name: Build
111
+ id: cmake_build
112
+ run: |
113
+ set -eux
114
+ cmake -B build \
115
+ -DGGML_NATIVE=OFF \
116
+ -DLLAMA_BUILD_SERVER=ON \
117
+ -DLLAMA_CURL=ON \
118
+ -DLLAMA_CUBLAS=ON \
119
+ -DCUDAToolkit_ROOT=/usr/local/cuda \
120
+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
121
+ -DCMAKE_CUDA_ARCHITECTURES=75 \
122
+ -DLLAMA_FATAL_WARNINGS=OFF \
123
+ -DLLAMA_ALL_WARNINGS=OFF \
124
+ -DCMAKE_BUILD_TYPE=Release;
125
+ cmake --build build --config Release -j $(nproc) --target llama-server
126
+
127
+ - name: Download the dataset
128
+ id: download_dataset
129
+ run: |
130
+ cd examples/server/bench
131
+ wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
132
+
133
+ - name: Server bench
134
+ id: server_bench
135
+ env:
136
+ HEAD_REF: ${{ github.head_ref || github.ref_name }}
137
+ run: |
138
+ set -eux
139
+
140
+ cd examples/server/bench
141
+ source venv/bin/activate
142
+ python bench.py \
143
+ --runner-label ${{ env.RUNNER_LABEL }} \
144
+ --name ${{ github.job }} \
145
+ --branch $HEAD_REF \
146
+ --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
147
+ --scenario script.js \
148
+ --duration ${{ github.event.inputs.duration || env.DURATION }} \
149
+ --hf-repo ggml-org/models \
150
+ --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
151
+ --model-path-prefix /models \
152
+ --parallel ${{ env.N_USERS }} \
153
+ -ngl 33 \
154
+ --batch-size 2048 \
155
+ --ubatch-size 256 \
156
+ --ctx-size 16384 \
157
+ --n-prompts 1000 \
158
+ --max-prompt-tokens 1024 \
159
+ --max-tokens 2048
160
+
161
+ cat results.github.env >> $GITHUB_ENV
162
+
163
+ # Remove dataset as we do not want it in the artefact
164
+ rm ShareGPT_V3_unfiltered_cleaned_split.json
165
+
166
+ - uses: actions/upload-artifact@v4
167
+ with:
168
+ name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
169
+ compression-level: 9
170
+ path: |
171
+ examples/server/bench/*.jpg
172
+ examples/server/bench/*.json
173
+ examples/server/bench/*.log
174
+
175
+ - name: Commit status
176
+ uses: Sibz/github-status-action@v1
177
+ with:
178
+ authToken: ${{secrets.GITHUB_TOKEN}}
179
+ sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
180
+ context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
181
+ description: |
182
+ ${{ env.BENCH_RESULTS }}
183
+ state: 'success'
184
+
185
+ - name: Upload benchmark images
186
+ uses: devicons/public-upload-to-imgur@v2.2.2
187
+ continue-on-error: true # Important as it looks unstable: 503
188
+ id: imgur_step
189
+ with:
190
+ client_id: ${{secrets.IMGUR_CLIENT_ID}}
191
+ path: |
192
+ examples/server/bench/prompt_tokens_seconds.jpg
193
+ examples/server/bench/predicted_tokens_seconds.jpg
194
+ examples/server/bench/kv_cache_usage_ratio.jpg
195
+ examples/server/bench/requests_processing.jpg
196
+
197
+ - name: Extract mermaid
198
+ id: set_mermaid
199
+ run: |
200
+ set -eux
201
+
202
+ cd examples/server/bench
203
+ PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
204
+ echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
205
+ echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
206
+ echo "EOF" >> $GITHUB_ENV
207
+
208
+ PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
209
+ echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
210
+ echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
211
+ echo "EOF" >> $GITHUB_ENV
212
+
213
+ KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
214
+ echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
215
+ echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
216
+ echo "EOF" >> $GITHUB_ENV
217
+
218
+ REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
219
+ echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
220
+ echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
221
+ echo "EOF" >> $GITHUB_ENV
222
+
223
+ - name: Extract image url
224
+ id: extract_image_url
225
+ continue-on-error: true
226
+ run: |
227
+ set -eux
228
+
229
+ echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
230
+ echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
231
+ echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
232
+ echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
233
+
234
+ - name: Comment PR
235
+ uses: mshick/add-pr-comment@v2
236
+ id: comment_pr
237
+ if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
238
+ with:
239
+ message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
240
+ message: |
241
+ <p align="center">
242
+
243
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
244
+
245
+ </p>
246
+
247
+ <details>
248
+
249
+ <summary>Expand details for performance related PR only</summary>
250
+
251
+ - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
252
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
253
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
254
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
255
+ - ${{ env.BENCH_GRAPH_XLABEL }}
256
+
257
+
258
+ <p align="center">
259
+
260
+ <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
261
+
262
+ <details>
263
+
264
+ <summary>More</summary>
265
+
266
+ ```mermaid
267
+ ${{ env.PROMPT_TOKENS_SECONDS }}
268
+ ```
269
+
270
+ </details>
271
+
272
+ <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
273
+
274
+ <details>
275
+ <summary>More</summary>
276
+
277
+ ```mermaid
278
+ ${{ env.PREDICTED_TOKENS_SECONDS }}
279
+ ```
280
+
281
+ </details>
282
+
283
+ </p>
284
+
285
+ <details>
286
+
287
+ <summary>Details</summary>
288
+
289
+ <p align="center">
290
+
291
+ <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
292
+
293
+ <details>
294
+ <summary>More</summary>
295
+
296
+ ```mermaid
297
+ ${{ env.KV_CACHE_USAGE_RATIO }}
298
+ ```
299
+
300
+ </details>
301
+
302
+ <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
303
+
304
+ <details>
305
+ <summary>More</summary>
306
+
307
+ ```mermaid
308
+ ${{ env.REQUESTS_PROCESSING }}
309
+ ```
310
+
311
+ </details>
312
+
313
+ </p>
314
+ </details>
315
+ </details>
llama.cpp/.github/workflows/build.yml ADDED
@@ -0,0 +1,1416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
14
+ pull_request:
15
+ types: [opened, synchronize, reopened]
16
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ contents: write # for creating release
26
+
27
+ env:
28
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
29
+ GGML_NLOOP: 3
30
+ GGML_N_THREADS: 1
31
+ LLAMA_LOG_COLORS: 1
32
+ LLAMA_LOG_PREFIX: 1
33
+ LLAMA_LOG_TIMESTAMPS: 1
34
+
35
+ jobs:
36
+ macOS-latest-cmake-arm64:
37
+ runs-on: macos-14
38
+
39
+ steps:
40
+ - name: Clone
41
+ id: checkout
42
+ uses: actions/checkout@v4
43
+ with:
44
+ fetch-depth: 0
45
+
46
+ - name: Dependencies
47
+ id: depends
48
+ continue-on-error: true
49
+ run: |
50
+ brew update
51
+
52
+ - name: Build
53
+ id: cmake_build
54
+ run: |
55
+ sysctl -a
56
+ mkdir build
57
+ cd build
58
+ cmake .. \
59
+ -DLLAMA_FATAL_WARNINGS=ON \
60
+ -DLLAMA_CURL=ON \
61
+ -DGGML_METAL_USE_BF16=ON \
62
+ -DGGML_METAL_EMBED_LIBRARY=ON \
63
+ -DGGML_RPC=ON \
64
+ -DBUILD_SHARED_LIBS=OFF
65
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
66
+
67
+ - name: Test
68
+ id: cmake_test
69
+ run: |
70
+ cd build
71
+ ctest -L 'main|curl' --verbose --timeout 900
72
+
73
+ - name: Determine tag name
74
+ id: tag
75
+ shell: bash
76
+ run: |
77
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
78
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
79
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
80
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
81
+ else
82
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
83
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
84
+ fi
85
+
86
+ - name: Pack artifacts
87
+ id: pack_artifacts
88
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
89
+ run: |
90
+ cp LICENSE ./build/bin/
91
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
92
+
93
+ - name: Upload artifacts
94
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
95
+ uses: actions/upload-artifact@v4
96
+ with:
97
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
98
+ name: llama-bin-macos-arm64.zip
99
+
100
+ macOS-latest-cmake-x64:
101
+ runs-on: macos-13
102
+
103
+ steps:
104
+ - name: Clone
105
+ id: checkout
106
+ uses: actions/checkout@v4
107
+ with:
108
+ fetch-depth: 0
109
+
110
+ - name: Dependencies
111
+ id: depends
112
+ continue-on-error: true
113
+ run: |
114
+ brew update
115
+
116
+ - name: Build
117
+ id: cmake_build
118
+ run: |
119
+ sysctl -a
120
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
121
+ # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
122
+ cmake -B build \
123
+ -DLLAMA_FATAL_WARNINGS=ON \
124
+ -DLLAMA_CURL=ON \
125
+ -DGGML_METAL=OFF \
126
+ -DGGML_RPC=ON \
127
+ -DBUILD_SHARED_LIBS=OFF
128
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
129
+
130
+ - name: Test
131
+ id: cmake_test
132
+ run: |
133
+ cd build
134
+ ctest -L main --verbose --timeout 900
135
+
136
+ - name: Determine tag name
137
+ id: tag
138
+ shell: bash
139
+ run: |
140
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
141
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
142
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
143
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
144
+ else
145
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
146
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
147
+ fi
148
+
149
+ - name: Pack artifacts
150
+ id: pack_artifacts
151
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
152
+ run: |
153
+ cp LICENSE ./build/bin/
154
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
155
+
156
+ - name: Upload artifacts
157
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
158
+ uses: actions/upload-artifact@v4
159
+ with:
160
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
161
+ name: llama-bin-macos-x64.zip
162
+
163
+ ubuntu-latest-cmake:
164
+ runs-on: ubuntu-latest
165
+
166
+ steps:
167
+ - name: Clone
168
+ id: checkout
169
+ uses: actions/checkout@v4
170
+ with:
171
+ fetch-depth: 0
172
+
173
+ - name: Dependencies
174
+ id: depends
175
+ run: |
176
+ sudo apt-get update
177
+ sudo apt-get install build-essential libcurl4-openssl-dev
178
+
179
+ - name: Build
180
+ id: cmake_build
181
+ run: |
182
+ mkdir build
183
+ cd build
184
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
185
+ cmake --build . --config Release -j $(nproc)
186
+
187
+ - name: Test
188
+ id: cmake_test
189
+ run: |
190
+ cd build
191
+ ctest -L 'main|curl' --verbose --timeout 900
192
+
193
+ - name: Test llama2c conversion
194
+ id: llama2c_test
195
+ run: |
196
+ cd build
197
+ echo "Fetch tokenizer"
198
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
199
+ echo "Fetch llama2c model"
200
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
201
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
202
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
203
+
204
+ - name: Determine tag name
205
+ id: tag
206
+ shell: bash
207
+ run: |
208
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
209
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
210
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
211
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
212
+ else
213
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
214
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
215
+ fi
216
+
217
+ - name: Pack artifacts
218
+ id: pack_artifacts
219
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
220
+ run: |
221
+ cp LICENSE ./build/bin/
222
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
223
+
224
+ - name: Upload artifacts
225
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
226
+ uses: actions/upload-artifact@v4
227
+ with:
228
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
229
+ name: llama-bin-ubuntu-x64.zip
230
+
231
+ ubuntu-latest-cmake-sanitizer:
232
+ runs-on: ubuntu-latest
233
+
234
+ continue-on-error: true
235
+
236
+ strategy:
237
+ matrix:
238
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
239
+ build_type: [Debug, Release]
240
+
241
+ steps:
242
+ - name: Clone
243
+ id: checkout
244
+ uses: actions/checkout@v4
245
+
246
+ - name: Dependencies
247
+ id: depends
248
+ run: |
249
+ sudo apt-get update
250
+ sudo apt-get install build-essential
251
+
252
+ - name: Build
253
+ id: cmake_build
254
+ if: ${{ matrix.sanitizer != 'THREAD' }}
255
+ run: |
256
+ mkdir build
257
+ cd build
258
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
259
+ cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
260
+
261
+ - name: Build (no OpenMP)
262
+ id: cmake_build_no_openmp
263
+ if: ${{ matrix.sanitizer == 'THREAD' }}
264
+ run: |
265
+ mkdir build
266
+ cd build
267
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
268
+ cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
269
+
270
+ - name: Test
271
+ id: cmake_test
272
+ run: |
273
+ cd build
274
+ ctest -L main --verbose --timeout 900
275
+
276
+ ubuntu-latest-cmake-rpc:
277
+ runs-on: ubuntu-latest
278
+
279
+ continue-on-error: true
280
+
281
+ steps:
282
+ - name: Clone
283
+ id: checkout
284
+ uses: actions/checkout@v4
285
+
286
+ - name: Dependencies
287
+ id: depends
288
+ run: |
289
+ sudo apt-get update
290
+ sudo apt-get install build-essential
291
+
292
+ - name: Build
293
+ id: cmake_build
294
+ run: |
295
+ mkdir build
296
+ cd build
297
+ cmake -DGGML_RPC=ON ..
298
+ cmake --build . --config Release -j $(nproc)
299
+
300
+ - name: Test
301
+ id: cmake_test
302
+ run: |
303
+ cd build
304
+ ctest -L main --verbose
305
+
306
+ ubuntu-22-cmake-vulkan:
307
+ runs-on: ubuntu-22.04
308
+
309
+ steps:
310
+ - name: Clone
311
+ id: checkout
312
+ uses: actions/checkout@v4
313
+
314
+ - name: Dependencies
315
+ id: depends
316
+ run: |
317
+ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
318
+ sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
319
+ sudo apt-get update -y
320
+ sudo apt-get install -y build-essential vulkan-sdk
321
+
322
+ - name: Build
323
+ id: cmake_build
324
+ run: |
325
+ mkdir build
326
+ cd build
327
+ cmake -DGGML_VULKAN=ON ..
328
+ cmake --build . --config Release -j $(nproc)
329
+
330
+ ubuntu-22-cmake-hip:
331
+ runs-on: ubuntu-22.04
332
+ container: rocm/dev-ubuntu-22.04:6.0.2
333
+
334
+ steps:
335
+ - name: Clone
336
+ id: checkout
337
+ uses: actions/checkout@v4
338
+
339
+ - name: Dependencies
340
+ id: depends
341
+ run: |
342
+ sudo apt-get update
343
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
344
+
345
+ - name: Build with native CMake HIP support
346
+ id: cmake_build
347
+ run: |
348
+ cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
349
+ cmake --build build --config Release -j $(nproc)
350
+
351
+ - name: Build with legacy HIP support
352
+ id: cmake_build_legacy_hip
353
+ run: |
354
+ cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
355
+ cmake --build build2 --config Release -j $(nproc)
356
+
357
+ ubuntu-22-cmake-musa:
358
+ runs-on: ubuntu-22.04
359
+ container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
360
+
361
+ steps:
362
+ - name: Clone
363
+ id: checkout
364
+ uses: actions/checkout@v4
365
+
366
+ - name: Dependencies
367
+ id: depends
368
+ run: |
369
+ apt-get update
370
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
371
+
372
+ - name: Build with native CMake MUSA support
373
+ id: cmake_build
374
+ run: |
375
+ cmake -B build -S . -DGGML_MUSA=ON
376
+ cmake --build build --config Release -j $(nproc)
377
+
378
+ ubuntu-22-cmake-sycl:
379
+ runs-on: ubuntu-22.04
380
+
381
+ continue-on-error: true
382
+
383
+ steps:
384
+ - uses: actions/checkout@v4
385
+
386
+ - name: add oneAPI to apt
387
+ shell: bash
388
+ run: |
389
+ cd /tmp
390
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
391
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
392
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
393
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
394
+
395
+ - name: install oneAPI dpcpp compiler
396
+ shell: bash
397
+ run: |
398
+ sudo apt update
399
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
400
+
401
+ - name: install oneAPI MKL library
402
+ shell: bash
403
+ run: |
404
+ sudo apt install intel-oneapi-mkl-devel
405
+
406
+ - name: Clone
407
+ id: checkout
408
+ uses: actions/checkout@v4
409
+
410
+ - name: Build
411
+ id: cmake_build
412
+ run: |
413
+ source /opt/intel/oneapi/setvars.sh
414
+ mkdir build
415
+ cd build
416
+ cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
417
+ cmake --build . --config Release -j $(nproc)
418
+
419
+ ubuntu-22-cmake-sycl-fp16:
420
+ runs-on: ubuntu-22.04
421
+
422
+ continue-on-error: true
423
+
424
+ steps:
425
+ - uses: actions/checkout@v4
426
+
427
+ - name: add oneAPI to apt
428
+ shell: bash
429
+ run: |
430
+ cd /tmp
431
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
432
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
433
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
434
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
435
+
436
+ - name: install oneAPI dpcpp compiler
437
+ shell: bash
438
+ run: |
439
+ sudo apt update
440
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
441
+
442
+ - name: install oneAPI MKL library
443
+ shell: bash
444
+ run: |
445
+ sudo apt install intel-oneapi-mkl-devel
446
+
447
+ - name: Clone
448
+ id: checkout
449
+ uses: actions/checkout@v4
450
+
451
+ - name: Build
452
+ id: cmake_build
453
+ run: |
454
+ source /opt/intel/oneapi/setvars.sh
455
+ mkdir build
456
+ cd build
457
+ cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
458
+ cmake --build . --config Release -j $(nproc)
459
+
460
+ # TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
461
+ # how to debug it.
462
+ # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
463
+ # would be great if we fix these
464
+ macOS-latest-cmake:
465
+ runs-on: macos-latest
466
+
467
+ steps:
468
+ - name: Clone
469
+ id: checkout
470
+ uses: actions/checkout@v4
471
+
472
+ - name: Dependencies
473
+ id: depends
474
+ continue-on-error: true
475
+ run: |
476
+ brew update
477
+
478
+ - name: Build
479
+ id: cmake_build
480
+ run: |
481
+ sysctl -a
482
+ mkdir build
483
+ cd build
484
+ cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
485
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
486
+
487
+ - name: Test
488
+ id: cmake_test
489
+ run: |
490
+ cd build
491
+ ctest -L main --verbose --timeout 900
492
+
493
+ macOS-latest-cmake-ios:
494
+ runs-on: macos-latest
495
+
496
+ steps:
497
+ - name: Clone
498
+ id: checkout
499
+ uses: actions/checkout@v4
500
+
501
+ - name: Dependencies
502
+ id: depends
503
+ continue-on-error: true
504
+ run: |
505
+ brew update
506
+
507
+ - name: Build
508
+ id: cmake_build
509
+ run: |
510
+ sysctl -a
511
+ mkdir build
512
+ cd build
513
+ cmake -G Xcode .. \
514
+ -DGGML_METAL_USE_BF16=ON \
515
+ -DGGML_METAL_EMBED_LIBRARY=ON \
516
+ -DLLAMA_BUILD_EXAMPLES=OFF \
517
+ -DLLAMA_BUILD_TESTS=OFF \
518
+ -DLLAMA_BUILD_SERVER=OFF \
519
+ -DCMAKE_SYSTEM_NAME=iOS \
520
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
521
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
522
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
523
+
524
+ macOS-latest-cmake-tvos:
525
+ runs-on: macos-latest
526
+
527
+ steps:
528
+ - name: Clone
529
+ id: checkout
530
+ uses: actions/checkout@v4
531
+
532
+ - name: Dependencies
533
+ id: depends
534
+ continue-on-error: true
535
+ run: |
536
+ brew update
537
+
538
+ - name: Build
539
+ id: cmake_build
540
+ run: |
541
+ sysctl -a
542
+ mkdir build
543
+ cd build
544
+ cmake -G Xcode .. \
545
+ -DGGML_METAL_USE_BF16=ON \
546
+ -DGGML_METAL_EMBED_LIBRARY=ON \
547
+ -DLLAMA_BUILD_EXAMPLES=OFF \
548
+ -DLLAMA_BUILD_TESTS=OFF \
549
+ -DLLAMA_BUILD_SERVER=OFF \
550
+ -DCMAKE_SYSTEM_NAME=tvOS \
551
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
552
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
553
+ cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
554
+
555
+ # TODO: tmp disabled. see for possible re-enable:
556
+ # https://github.com/ggerganov/llama.cpp/pull/10525
557
+ # macOS-latest-swift:
558
+ # runs-on: macos-latest
559
+ #
560
+ # strategy:
561
+ # matrix:
562
+ # destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
563
+ #
564
+ # steps:
565
+ # - name: Clone
566
+ # id: checkout
567
+ # uses: actions/checkout@v4
568
+ #
569
+ # - name: Dependencies
570
+ # id: depends
571
+ # continue-on-error: true
572
+ # run: |
573
+ # brew update
574
+ #
575
+ # - name: xcodebuild for swift package
576
+ # id: xcodebuild
577
+ # run: |
578
+ # xcodebuild -scheme llama -destination "${{ matrix.destination }}"
579
+ #
580
+ # - name: Build Swift Example
581
+ # id: make_build_swift_example
582
+ # run: |
583
+ # make swift
584
+
585
+ windows-msys2:
586
+ runs-on: windows-latest
587
+
588
+ strategy:
589
+ fail-fast: false
590
+ matrix:
591
+ include:
592
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
593
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
594
+
595
+ steps:
596
+ - name: Clone
597
+ uses: actions/checkout@v4
598
+
599
+ - name: Setup ${{ matrix.sys }}
600
+ uses: msys2/setup-msys2@v2
601
+ with:
602
+ update: true
603
+ msystem: ${{matrix.sys}}
604
+ install: >-
605
+ base-devel
606
+ mingw-w64-${{matrix.env}}-toolchain
607
+ mingw-w64-${{matrix.env}}-cmake
608
+ mingw-w64-${{matrix.env}}-openblas
609
+
610
+ - name: Build using CMake
611
+ shell: msys2 {0}
612
+ run: |
613
+ cmake -B build
614
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
615
+
616
+ - name: Clean after building using CMake
617
+ shell: msys2 {0}
618
+ run: |
619
+ rm -rf build
620
+
621
+ - name: Build using CMake w/ OpenBLAS
622
+ shell: msys2 {0}
623
+ run: |
624
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
625
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
626
+
627
+ windows-latest-cmake:
628
+ runs-on: windows-latest
629
+
630
+ env:
631
+ OPENBLAS_VERSION: 0.3.23
632
+ SDE_VERSION: 9.33.0-2024-01-07
633
+ VULKAN_VERSION: 1.3.261.1
634
+
635
+ strategy:
636
+ matrix:
637
+ include:
638
+ - build: 'noavx-x64'
639
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
640
+ - build: 'avx2-x64'
641
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
642
+ - build: 'avx-x64'
643
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
644
+ - build: 'avx512-x64'
645
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
646
+ - build: 'openblas-x64'
647
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
648
+ - build: 'kompute-x64'
649
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
650
+ - build: 'vulkan-x64'
651
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
652
+ - build: 'llvm-arm64'
653
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
654
+ - build: 'msvc-arm64'
655
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
656
+
657
+ steps:
658
+ - name: Clone
659
+ id: checkout
660
+ uses: actions/checkout@v4
661
+ with:
662
+ fetch-depth: 0
663
+
664
+ - name: Clone Kompute submodule
665
+ id: clone_kompute
666
+ if: ${{ matrix.build == 'kompute-x64' }}
667
+ run: |
668
+ git submodule update --init ggml/src/ggml-kompute/kompute
669
+
670
+ - name: Download OpenBLAS
671
+ id: get_openblas
672
+ if: ${{ matrix.build == 'openblas-x64' }}
673
+ run: |
674
+ curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
675
+ curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
676
+ mkdir $env:RUNNER_TEMP/openblas
677
+ tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
678
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
679
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
680
+ $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
681
+ & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
682
+
683
+ - name: Install Vulkan SDK
684
+ id: get_vulkan
685
+ if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
686
+ run: |
687
+ curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
688
+ & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
689
+ Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
690
+ Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
691
+
692
+ - name: Install Ninja
693
+ id: install_ninja
694
+ run: |
695
+ choco install ninja
696
+
697
+ - name: Build
698
+ id: cmake_build
699
+ run: |
700
+ cmake -S . -B build ${{ matrix.defines }}
701
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
702
+
703
+ - name: Add libopenblas.dll
704
+ id: add_libopenblas_dll
705
+ if: ${{ matrix.build == 'openblas-x64' }}
706
+ run: |
707
+ cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
708
+ cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
709
+
710
+ - name: Check AVX512F support
711
+ id: check_avx512f
712
+ if: ${{ matrix.build == 'avx512-x64' }}
713
+ continue-on-error: true
714
+ run: |
715
+ cd build
716
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
717
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
718
+ $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
719
+ echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
720
+ & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
721
+ .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
722
+
723
+ - name: Test
724
+ id: cmake_test
725
+ # not all machines have native AVX-512
726
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
727
+ run: |
728
+ cd build
729
+ ctest -L main -C Release --verbose --timeout 900
730
+
731
+ - name: Test (Intel SDE)
732
+ id: cmake_test_sde
733
+ if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
734
+ run: |
735
+ curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
736
+ # for some weird reason windows tar doesn't like sde tar.xz
737
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
738
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
739
+ $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
740
+ cd build
741
+ $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
742
+ & $sde -future -- ctest -L main -C Release --verbose --timeout 900
743
+
744
+ - name: Determine tag name
745
+ id: tag
746
+ shell: bash
747
+ run: |
748
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
749
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
750
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
751
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
752
+ else
753
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
754
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
755
+ fi
756
+
757
+ - name: Pack artifacts
758
+ id: pack_artifacts
759
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
760
+ run: |
761
+ Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
762
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
763
+
764
+ - name: Upload artifacts
765
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
766
+ uses: actions/upload-artifact@v4
767
+ with:
768
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
769
+ name: llama-bin-win-${{ matrix.build }}.zip
770
+
771
+ ubuntu-latest-cmake-cuda:
772
+ runs-on: ubuntu-latest
773
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
774
+
775
+ steps:
776
+ - name: Clone
777
+ id: checkout
778
+ uses: actions/checkout@v4
779
+
780
+ - name: Install dependencies
781
+ env:
782
+ DEBIAN_FRONTEND: noninteractive
783
+ run: |
784
+ apt update
785
+ apt install -y cmake build-essential ninja-build libgomp1 git
786
+
787
+ - name: Build with CMake
788
+ run: |
789
+ cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
790
+ cmake --build build
791
+
792
+ windows-2019-cmake-cuda:
793
+ runs-on: windows-2019
794
+
795
+ strategy:
796
+ matrix:
797
+ cuda: ['12.4', '11.7']
798
+ build: ['cuda']
799
+
800
+ steps:
801
+ - name: Clone
802
+ id: checkout
803
+ uses: actions/checkout@v4
804
+ with:
805
+ fetch-depth: 0
806
+
807
+ - name: Install Cuda Toolkit 11.7
808
+ if: ${{ matrix.cuda == '11.7' }}
809
+ run: |
810
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
811
+ choco install unzip -y
812
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
813
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
814
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
815
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
816
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
817
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
818
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
819
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
820
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
821
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
822
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
823
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
824
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
825
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
826
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
827
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
828
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
829
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
830
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
831
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
832
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
833
+
834
+ - name: Install Cuda Toolkit 12.4
835
+ if: ${{ matrix.cuda == '12.4' }}
836
+ run: |
837
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
838
+ choco install unzip -y
839
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
840
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
841
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
842
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
843
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
844
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
845
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
846
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
847
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
848
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
849
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
850
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
851
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
852
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
853
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
854
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
855
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
856
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
857
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
858
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
859
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
860
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
861
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
862
+
863
+ - name: Install ccache
864
+ uses: hendrikmuhs/ccache-action@v1.2
865
+ with:
866
+ key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
867
+
868
+ - name: Install Ninja
869
+ id: install_ninja
870
+ run: |
871
+ choco install ninja
872
+
873
+ - name: Build
874
+ id: cmake_build
875
+ shell: cmd
876
+ run: |
877
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
878
+ cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
879
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
880
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
881
+ cmake --build build --config Release
882
+
883
+ - name: Determine tag name
884
+ id: tag
885
+ shell: bash
886
+ run: |
887
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
888
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
889
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
890
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
891
+ else
892
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
893
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
894
+ fi
895
+
896
+ - name: Pack artifacts
897
+ id: pack_artifacts
898
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
899
+ run: |
900
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
901
+
902
+ - name: Upload artifacts
903
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
904
+ uses: actions/upload-artifact@v4
905
+ with:
906
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
907
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
908
+
909
+ - name: Copy and pack Cuda runtime
910
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
911
+ run: |
912
+ echo "Cuda install location: ${{ env.CUDA_PATH }}"
913
+ $dst='.\build\bin\cudart\'
914
+ robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
915
+ robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
916
+ 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
917
+
918
+ - name: Upload Cuda runtime
919
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
920
+ uses: actions/upload-artifact@v4
921
+ with:
922
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
923
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
924
+
925
+ windows-latest-cmake-sycl:
926
+ runs-on: windows-latest
927
+
928
+ defaults:
929
+ run:
930
+ shell: bash
931
+
932
+ env:
933
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
934
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
935
+ ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
936
+ steps:
937
+ - name: Clone
938
+ id: checkout
939
+ uses: actions/checkout@v4
940
+ with:
941
+ fetch-depth: 0
942
+
943
+ - name: Install
944
+ run: |
945
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
946
+
947
+ - name: Build
948
+ id: cmake_build
949
+ run: examples/sycl/win-build-sycl.bat
950
+
951
+ - name: Determine tag name
952
+ id: tag
953
+ shell: bash
954
+ run: |
955
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
956
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
957
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
958
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
959
+ else
960
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
961
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
962
+ fi
963
+
964
+ - name: Build the release package
965
+ id: pack_artifacts
966
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
967
+ run: |
968
+ echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
969
+
970
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
971
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
972
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
973
+
974
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
975
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
976
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
977
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
978
+
979
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
980
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
981
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
982
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
983
+
984
+ cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
985
+ cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
986
+
987
+ echo "cp oneAPI running time dll files to ./build/bin done"
988
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
989
+
990
+ - name: Upload the release package
991
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
992
+ uses: actions/upload-artifact@v4
993
+ with:
994
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
995
+ name: llama-bin-win-sycl-x64.zip
996
+
997
+ windows-latest-cmake-hip:
998
+ if: ${{ github.event.inputs.create_release != 'true' }}
999
+ runs-on: windows-latest
1000
+
1001
+ steps:
1002
+ - name: Clone
1003
+ id: checkout
1004
+ uses: actions/checkout@v4
1005
+
1006
+ - name: Install
1007
+ id: depends
1008
+ run: |
1009
+ $ErrorActionPreference = "Stop"
1010
+ write-host "Downloading AMD HIP SDK Installer"
1011
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1012
+ write-host "Installing AMD HIP SDK"
1013
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1014
+ write-host "Completed AMD HIP SDK installation"
1015
+
1016
+ - name: Verify ROCm
1017
+ id: verify
1018
+ run: |
1019
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1020
+
1021
+ - name: Install ccache
1022
+ uses: hendrikmuhs/ccache-action@v1.2
1023
+ with:
1024
+ key: ${{ github.job }}
1025
+
1026
+ - name: Build
1027
+ id: cmake_build
1028
+ run: |
1029
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1030
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1031
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
1032
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1033
+
1034
+ windows-latest-cmake-hip-release:
1035
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1036
+ runs-on: windows-latest
1037
+
1038
+ strategy:
1039
+ matrix:
1040
+ gpu_target: [gfx1100, gfx1101, gfx1030]
1041
+
1042
+ steps:
1043
+ - name: Clone
1044
+ id: checkout
1045
+ uses: actions/checkout@v4
1046
+ with:
1047
+ fetch-depth: 0
1048
+
1049
+ - name: Install
1050
+ id: depends
1051
+ run: |
1052
+ $ErrorActionPreference = "Stop"
1053
+ write-host "Downloading AMD HIP SDK Installer"
1054
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1055
+ write-host "Installing AMD HIP SDK"
1056
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1057
+ write-host "Completed AMD HIP SDK installation"
1058
+
1059
+ - name: Verify ROCm
1060
+ id: verify
1061
+ run: |
1062
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1063
+
1064
+ - name: Build
1065
+ id: cmake_build
1066
+ run: |
1067
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1068
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1069
+ cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1070
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1071
+ md "build\bin\rocblas\library\"
1072
+ cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
1073
+ cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
1074
+ cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
1075
+
1076
+ - name: Determine tag name
1077
+ id: tag
1078
+ shell: bash
1079
+ run: |
1080
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1081
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1082
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1083
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1084
+ else
1085
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1086
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1087
+ fi
1088
+
1089
+ - name: Pack artifacts
1090
+ id: pack_artifacts
1091
+ run: |
1092
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
1093
+
1094
+ - name: Upload artifacts
1095
+ uses: actions/upload-artifact@v4
1096
+ with:
1097
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1098
+ name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1099
+
1100
+ ios-xcode-build:
1101
+ runs-on: macos-latest
1102
+
1103
+ steps:
1104
+ - name: Checkout code
1105
+ uses: actions/checkout@v4
1106
+
1107
+ - name: Build Xcode project
1108
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1109
+
1110
+ android-build:
1111
+ runs-on: ubuntu-latest
1112
+
1113
+ steps:
1114
+ - name: Clone
1115
+ uses: actions/checkout@v4
1116
+
1117
+ - name: Set up JDK
1118
+ uses: actions/setup-java@v3
1119
+ with:
1120
+ java-version: 17
1121
+ distribution: zulu
1122
+
1123
+ - name: Setup Android SDK
1124
+ uses: android-actions/setup-android@v3
1125
+ with:
1126
+ log-accepted-android-sdk-licenses: false
1127
+
1128
+ - name: Build
1129
+ run: |
1130
+ cd examples/llama.android
1131
+
1132
+ ./gradlew build --no-daemon
1133
+
1134
+ # freeBSD-latest:
1135
+ # runs-on: macos-12
1136
+ # steps:
1137
+ # - name: Clone
1138
+ # uses: actions/checkout@v4
1139
+ #
1140
+ # - name: Build
1141
+ # uses: cross-platform-actions/action@v0.19.0
1142
+ # with:
1143
+ # operating_system: freebsd
1144
+ # version: '13.2'
1145
+ # hypervisor: 'qemu'
1146
+ # run: |
1147
+ # sudo pkg update
1148
+ # sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
1149
+ # gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
1150
+
1151
+ release:
1152
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1153
+
1154
+ runs-on: ubuntu-latest
1155
+
1156
+ needs:
1157
+ - ubuntu-latest-cmake
1158
+ - macOS-latest-cmake
1159
+ - windows-latest-cmake
1160
+ - windows-2019-cmake-cuda
1161
+ - windows-latest-cmake-hip-release
1162
+ - macOS-latest-cmake-arm64
1163
+ - macOS-latest-cmake-x64
1164
+
1165
+ steps:
1166
+ - name: Clone
1167
+ id: checkout
1168
+ uses: actions/checkout@v4
1169
+ with:
1170
+ fetch-depth: 0
1171
+
1172
+ - name: Determine tag name
1173
+ id: tag
1174
+ shell: bash
1175
+ run: |
1176
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1177
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1178
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1179
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1180
+ else
1181
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1182
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1183
+ fi
1184
+
1185
+ - name: Download artifacts
1186
+ id: download-artifact
1187
+ uses: actions/download-artifact@v4
1188
+ with:
1189
+ path: ./artifact
1190
+
1191
+ - name: Move artifacts
1192
+ id: move_artifacts
1193
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
1194
+
1195
+ - name: Create release
1196
+ id: create_release
1197
+ uses: anzz1/action-create-release@v1
1198
+ env:
1199
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1200
+ with:
1201
+ tag_name: ${{ steps.tag.outputs.name }}
1202
+
1203
+ - name: Upload release
1204
+ id: upload_release
1205
+ uses: actions/github-script@v3
1206
+ with:
1207
+ github-token: ${{secrets.GITHUB_TOKEN}}
1208
+ script: |
1209
+ const path = require('path');
1210
+ const fs = require('fs');
1211
+ const release_id = '${{ steps.create_release.outputs.id }}';
1212
+ for (let file of await fs.readdirSync('./artifact/release')) {
1213
+ if (path.extname(file) === '.zip') {
1214
+ console.log('uploadReleaseAsset', file);
1215
+ await github.repos.uploadReleaseAsset({
1216
+ owner: context.repo.owner,
1217
+ repo: context.repo.repo,
1218
+ release_id: release_id,
1219
+ name: file,
1220
+ data: await fs.readFileSync(`./artifact/release/${file}`)
1221
+ });
1222
+ }
1223
+ }
1224
+
1225
+ # ubuntu-latest-gcc:
1226
+ # runs-on: ubuntu-latest
1227
+ #
1228
+ # strategy:
1229
+ # matrix:
1230
+ # build: [Debug, Release]
1231
+ #
1232
+ # steps:
1233
+ # - name: Clone
1234
+ # uses: actions/checkout@v4
1235
+ #
1236
+ # - name: Dependencies
1237
+ # run: |
1238
+ # sudo apt-get update
1239
+ # sudo apt-get install build-essential
1240
+ # sudo apt-get install cmake
1241
+ #
1242
+ # - name: Configure
1243
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1244
+ #
1245
+ # - name: Build
1246
+ # run: |
1247
+ # make
1248
+ #
1249
+ # ubuntu-latest-clang:
1250
+ # runs-on: ubuntu-latest
1251
+ #
1252
+ # strategy:
1253
+ # matrix:
1254
+ # build: [Debug, Release]
1255
+ #
1256
+ # steps:
1257
+ # - name: Clone
1258
+ # uses: actions/checkout@v4
1259
+ #
1260
+ # - name: Dependencies
1261
+ # run: |
1262
+ # sudo apt-get update
1263
+ # sudo apt-get install build-essential
1264
+ # sudo apt-get install cmake
1265
+ #
1266
+ # - name: Configure
1267
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
1268
+ #
1269
+ # - name: Build
1270
+ # run: |
1271
+ # make
1272
+ #
1273
+ # ubuntu-latest-gcc-sanitized:
1274
+ # runs-on: ubuntu-latest
1275
+ #
1276
+ # strategy:
1277
+ # matrix:
1278
+ # sanitizer: [ADDRESS, THREAD, UNDEFINED]
1279
+ #
1280
+ # steps:
1281
+ # - name: Clone
1282
+ # uses: actions/checkout@v4
1283
+ #
1284
+ # - name: Dependencies
1285
+ # run: |
1286
+ # sudo apt-get update
1287
+ # sudo apt-get install build-essential
1288
+ # sudo apt-get install cmake
1289
+ #
1290
+ # - name: Configure
1291
+ # run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
1292
+ #
1293
+ # - name: Build
1294
+ # run: |
1295
+ # make
1296
+ #
1297
+ # windows:
1298
+ # runs-on: windows-latest
1299
+ #
1300
+ # strategy:
1301
+ # matrix:
1302
+ # build: [Release]
1303
+ # arch: [Win32, x64]
1304
+ # include:
1305
+ # - arch: Win32
1306
+ # s2arc: x86
1307
+ # - arch: x64
1308
+ # s2arc: x64
1309
+ #
1310
+ # steps:
1311
+ # - name: Clone
1312
+ # uses: actions/checkout@v4
1313
+ #
1314
+ # - name: Add msbuild to PATH
1315
+ # uses: microsoft/setup-msbuild@v1
1316
+ #
1317
+ # - name: Configure
1318
+ # run: >
1319
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1320
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1321
+ #
1322
+ # - name: Build
1323
+ # run: |
1324
+ # cd ./build
1325
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1326
+ #
1327
+ # - name: Upload binaries
1328
+ # uses: actions/upload-artifact@v4
1329
+ # with:
1330
+ # name: llama-bin-${{ matrix.arch }}
1331
+ # path: build/bin/${{ matrix.build }}
1332
+ #
1333
+ # windows-blas:
1334
+ # runs-on: windows-latest
1335
+ #
1336
+ # strategy:
1337
+ # matrix:
1338
+ # build: [Release]
1339
+ # arch: [Win32, x64]
1340
+ # blas: [ON]
1341
+ # include:
1342
+ # - arch: Win32
1343
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
1344
+ # s2arc: x86
1345
+ # - arch: x64
1346
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
1347
+ # s2arc: x64
1348
+ #
1349
+ # steps:
1350
+ # - name: Clone
1351
+ # uses: actions/checkout@v4
1352
+ #
1353
+ # - name: Add msbuild to PATH
1354
+ # uses: microsoft/setup-msbuild@v1
1355
+ #
1356
+ # - name: Fetch OpenBLAS
1357
+ # if: matrix.blas == 'ON'
1358
+ # run: |
1359
+ # C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
1360
+ # 7z x blas.zip -oblas -y
1361
+ # copy blas/include/cblas.h .
1362
+ # copy blas/include/openblas_config.h .
1363
+ # echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
1364
+ #
1365
+ # - name: Configure
1366
+ # run: >
1367
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1368
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1369
+ # -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
1370
+ # -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
1371
+ #
1372
+ # - name: Build
1373
+ # run: |
1374
+ # cd ./build
1375
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1376
+ #
1377
+ # - name: Copy libopenblas.dll
1378
+ # if: matrix.blas == 'ON'
1379
+ # run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
1380
+ #
1381
+ # - name: Upload binaries
1382
+ # if: matrix.blas == 'ON'
1383
+ # uses: actions/upload-artifact@v4
1384
+ # with:
1385
+ # name: llama-blas-bin-${{ matrix.arch }}
1386
+ # path: build/bin/${{ matrix.build }}
1387
+ #
1388
+ # emscripten:
1389
+ # runs-on: ubuntu-latest
1390
+ #
1391
+ # strategy:
1392
+ # matrix:
1393
+ # build: [Release]
1394
+ #
1395
+ # steps:
1396
+ # - name: Clone
1397
+ # uses: actions/checkout@v4
1398
+ #
1399
+ # - name: Dependencies
1400
+ # run: |
1401
+ # wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
1402
+ # tar -xvf master.tar.gz
1403
+ # emsdk-master/emsdk update
1404
+ # emsdk-master/emsdk install latest
1405
+ # emsdk-master/emsdk activate latest
1406
+ #
1407
+ # - name: Configure
1408
+ # run: echo "tmp"
1409
+ #
1410
+ # - name: Build
1411
+ # run: |
1412
+ # pushd emsdk-master
1413
+ # source ./emsdk_env.sh
1414
+ # popd
1415
+ # emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1416
+ # make
llama.cpp/.github/workflows/close-issue.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Close inactive issues
2
+ on:
3
+ schedule:
4
+ - cron: "42 0 * * *"
5
+
6
+ # Fine-grant permission
7
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8
+ permissions:
9
+ issues: write
10
+
11
+ jobs:
12
+ close-issues:
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ issues: write
16
+ pull-requests: write
17
+ steps:
18
+ - uses: actions/stale@v5
19
+ with:
20
+ exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
21
+ days-before-issue-stale: 30
22
+ days-before-issue-close: 14
23
+ stale-issue-label: "stale"
24
+ close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
25
+ days-before-pr-stale: -1
26
+ days-before-pr-close: -1
27
+ operations-per-run: 10000
28
+ repo-token: ${{ secrets.GITHUB_TOKEN }}