GGUF
johnbenac commited on
Commit
e80739d
·
verified ·
1 Parent(s): eed8f0a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .clang-format +161 -0
  2. .clang-tidy +26 -0
  3. .devops/cloud-v-pipeline +22 -0
  4. .devops/cpu.Dockerfile +92 -0
  5. .devops/cuda.Dockerfile +94 -0
  6. .devops/intel.Dockerfile +91 -0
  7. .devops/llama-cli-cann.Dockerfile +44 -0
  8. .devops/llama-cpp-cuda.srpm.spec +83 -0
  9. .devops/llama-cpp.srpm.spec +85 -0
  10. .devops/musa.Dockerfile +108 -0
  11. .devops/nix/apps.nix +21 -0
  12. .devops/nix/devshells.nix +52 -0
  13. .devops/nix/docker.nix +37 -0
  14. .devops/nix/jetson-support.nix +39 -0
  15. .devops/nix/nixpkgs-instances.nix +45 -0
  16. .devops/nix/package-gguf-py.nix +36 -0
  17. .devops/nix/package.nix +247 -0
  18. .devops/nix/python-scripts.nix +66 -0
  19. .devops/nix/scope.nix +41 -0
  20. .devops/nix/sif.nix +27 -0
  21. .devops/rocm.Dockerfile +113 -0
  22. .devops/tools.sh +49 -0
  23. .devops/vulkan.Dockerfile +89 -0
  24. .dockerignore +20 -0
  25. .ecrc +6 -0
  26. .editorconfig +50 -0
  27. .flake8 +17 -0
  28. .gitattributes +23 -0
  29. .github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
  30. .github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
  31. .github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
  32. .github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
  33. .github/ISSUE_TEMPLATE/030-research.yml +52 -0
  34. .github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
  35. .github/ISSUE_TEMPLATE/config.yml +11 -0
  36. .github/labeler.yml +86 -0
  37. .github/pull_request_template.md +1 -0
  38. .github/workflows/bench.yml.disabled +305 -0
  39. .github/workflows/build.yml +1756 -0
  40. .github/workflows/close-issue.yml +28 -0
  41. .github/workflows/docker.yml +175 -0
  42. .github/workflows/editorconfig.yml +29 -0
  43. .github/workflows/gguf-publish.yml +44 -0
  44. .github/workflows/labeler.yml +17 -0
  45. .github/workflows/python-check-requirements.yml +33 -0
  46. .github/workflows/python-lint.yml +30 -0
  47. .github/workflows/python-type-check.yml +40 -0
  48. .github/workflows/server.yml +241 -0
  49. .gitignore +149 -0
  50. .gitmodules +3 -0
.clang-format ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Language: Cpp
3
+ AlignAfterOpenBracket: Align
4
+ AlignArrayOfStructures: Left
5
+ AlignConsecutiveAssignments: AcrossComments
6
+ AlignConsecutiveBitFields: AcrossComments
7
+ AlignConsecutiveDeclarations: AcrossComments
8
+ AlignConsecutiveMacros: AcrossComments
9
+ # AlignConsecutiveShortCaseStatements: AcrossComments
10
+ AlignEscapedNewlines: Left # LeftWithLastLine
11
+ AlignOperands: Align
12
+ AlignTrailingComments:
13
+ Kind: Always
14
+ OverEmptyLines: 1
15
+ AllowAllArgumentsOnNextLine: true
16
+ AllowAllParametersOfDeclarationOnNextLine: false
17
+ # AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
18
+ AllowShortBlocksOnASingleLine: Never
19
+ AllowShortCaseLabelsOnASingleLine: false
20
+ AllowShortFunctionsOnASingleLine: Inline
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLambdasOnASingleLine: Inline
23
+ AllowShortLoopsOnASingleLine: false
24
+ AlwaysBreakBeforeMultilineStrings: true
25
+ BinPackArguments: true
26
+ BinPackParameters: true # OnePerLine
27
+ BitFieldColonSpacing: Both
28
+ BreakBeforeBraces: Custom # Attach
29
+ BraceWrapping:
30
+ AfterCaseLabel: true
31
+ AfterClass: false
32
+ AfterControlStatement: false
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: false
46
+ SplitEmptyRecord: false
47
+ SplitEmptyNamespace: false
48
+ # BreakAdjacentStringLiterals: true
49
+ BreakAfterAttributes: Never
50
+ BreakBeforeBinaryOperators: None
51
+ BreakBeforeInlineASMColon: OnlyMultiline
52
+ BreakBeforeTernaryOperators: false
53
+ # BreakBinaryOperations: Never
54
+ BreakConstructorInitializers: AfterColon
55
+ # BreakFunctionDefinitionParameters: false
56
+ BreakInheritanceList: AfterComma
57
+ BreakStringLiterals: true
58
+ # BreakTemplateDeclarations: Yes
59
+ ColumnLimit: 120
60
+ CommentPragmas: '^ IWYU pragma:'
61
+ CompactNamespaces: false
62
+ ConstructorInitializerIndentWidth: 4
63
+ ContinuationIndentWidth: 4
64
+ Cpp11BracedListStyle: false
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ EmptyLineBeforeAccessModifier: Leave
68
+ EmptyLineAfterAccessModifier: Never
69
+ ExperimentalAutoDetectBinPacking: false
70
+ FixNamespaceComments: true
71
+ IncludeBlocks: Regroup
72
+ IncludeCategories:
73
+ - Regex: '^<.*\.h>'
74
+ Priority: 1
75
+ SortPriority: 0
76
+ - Regex: '^<.*'
77
+ Priority: 2
78
+ SortPriority: 0
79
+ - Regex: '.*'
80
+ Priority: 3
81
+ SortPriority: 0
82
+ IncludeIsMainRegex: '([-_](test|unittest))?$'
83
+ IncludeIsMainSourceRegex: ''
84
+ IndentAccessModifiers: false
85
+ IndentCaseBlocks: true
86
+ IndentCaseLabels: true
87
+ IndentExternBlock: NoIndent
88
+ IndentGotoLabels: false
89
+ IndentPPDirectives: AfterHash
90
+ IndentWidth: 4
91
+ IndentWrappedFunctionNames: false
92
+ InsertBraces: true # NOTE: may lead to incorrect formatting
93
+ InsertNewlineAtEOF: true
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: false
97
+ LambdaBodyIndentation: Signature
98
+ LineEnding: LF
99
+ MacroBlockBegin: ''
100
+ MacroBlockEnd: ''
101
+ MaxEmptyLinesToKeep: 1
102
+ NamespaceIndentation: None
103
+ ObjCBinPackProtocolList: Auto
104
+ ObjCBlockIndentWidth: 4
105
+ ObjCSpaceAfterProperty: true
106
+ ObjCSpaceBeforeProtocolList: true
107
+ PPIndentWidth: -1
108
+ PackConstructorInitializers: CurrentLine
109
+ PenaltyBreakAssignment: 2
110
+ PenaltyBreakBeforeFirstCallParameter: 1
111
+ PenaltyBreakComment: 300
112
+ PenaltyBreakFirstLessLess: 120
113
+ PenaltyBreakString: 1000
114
+ PenaltyBreakTemplateDeclaration: 10
115
+ PenaltyExcessCharacter: 1000000
116
+ PenaltyReturnTypeOnItsOwnLine: 200
117
+ PointerAlignment: Middle
118
+ QualifierAlignment: Left
119
+ #QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
120
+ RawStringFormats:
121
+ - Language: Cpp
122
+ Delimiters:
123
+ - cc
124
+ - CC
125
+ - cpp
126
+ - Cpp
127
+ - CPP
128
+ - 'c++'
129
+ - 'C++'
130
+ CanonicalDelimiter: ''
131
+ ReferenceAlignment: Middle
132
+ ReflowComments: false # IndentOnly
133
+ SeparateDefinitionBlocks: Always
134
+ SortIncludes: CaseInsensitive
135
+ SortUsingDeclarations: LexicographicNumeric
136
+ SpaceAfterCStyleCast: true
137
+ SpaceAfterLogicalNot: false
138
+ SpaceAfterTemplateKeyword: true
139
+ SpaceBeforeAssignmentOperators: true
140
+ SpaceBeforeCpp11BracedList: false
141
+ SpaceBeforeCtorInitializerColon: true
142
+ SpaceBeforeInheritanceColon: true
143
+ SpaceBeforeParens: ControlStatements
144
+ SpaceBeforeRangeBasedForLoopColon: true
145
+ SpaceInEmptyBlock: false
146
+ SpaceInEmptyParentheses: false
147
+ SpacesBeforeTrailingComments: 2
148
+ SpacesInAngles: Never
149
+ SpacesInContainerLiterals: true
150
+ SpacesInLineCommentPrefix:
151
+ Minimum: 1
152
+ Maximum: -1
153
+ SpacesInParentheses: false
154
+ SpacesInSquareBrackets: false
155
+ SpaceBeforeSquareBrackets: false
156
+ Standard: c++17
157
+ TabWidth: 4
158
+ UseTab: Never
159
+ WhitespaceSensitiveMacros: ['STRINGIZE']
160
+ ...
161
+
.clang-tidy ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Checks: >
3
+ bugprone-*,
4
+ -bugprone-easily-swappable-parameters,
5
+ -bugprone-implicit-widening-of-multiplication-result,
6
+ -bugprone-misplaced-widening-cast,
7
+ -bugprone-narrowing-conversions,
8
+ readability-*,
9
+ -readability-avoid-unconditional-preprocessor-if,
10
+ -readability-function-cognitive-complexity,
11
+ -readability-identifier-length,
12
+ -readability-implicit-bool-conversion,
13
+ -readability-magic-numbers,
14
+ -readability-uppercase-literal-suffix,
15
+ -readability-simplify-boolean-expr,
16
+ clang-analyzer-*,
17
+ -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
18
+ performance-*,
19
+ portability-*,
20
+ -portability-simd-intrinsics,
21
+ misc-*,
22
+ -misc-const-correctness,
23
+ -misc-non-private-member-variables-in-classes,
24
+ -misc-no-recursion,
25
+ -misc-use-anonymous-namespace,
26
+ FormatStyle: none
.devops/cloud-v-pipeline ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2
+ stage('Cleanup'){
3
+ cleanWs() // Cleaning previous CI build in workspace
4
+ }
5
+ stage('checkout repo'){
6
+ retry(5){ // Retry if the cloning fails due to some reason
7
+ checkout scm // Clone the repo on Runner
8
+ }
9
+ }
10
+ stage('Compiling llama.cpp'){
11
+ sh'''#!/bin/bash
12
+ make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13
+ '''
14
+ }
15
+ stage('Running llama.cpp'){
16
+ sh'''#!/bin/bash
17
+ module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18
+ qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19
+ cat llama_log.txt # Printing results
20
+ '''
21
+ }
22
+ }
.devops/cpu.Dockerfile ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ ARG TARGETARCH
6
+
7
+ ARG GGML_CPU_ARM_ARCH=armv8-a
8
+
9
+ RUN apt-get update && \
10
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
11
+
12
+ WORKDIR /app
13
+
14
+ COPY . .
15
+
16
+ RUN if [ "$TARGETARCH" = "amd64" ]; then \
17
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18
+ elif [ "$TARGETARCH" = "arm64" ]; then \
19
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
20
+ else \
21
+ echo "Unsupported architecture"; \
22
+ exit 1; \
23
+ fi && \
24
+ cmake --build build -j $(nproc)
25
+
26
+ RUN mkdir -p /app/lib && \
27
+ find build -name "*.so" -exec cp {} /app/lib \;
28
+
29
+ RUN mkdir -p /app/full \
30
+ && cp build/bin/* /app/full \
31
+ && cp *.py /app/full \
32
+ && cp -r gguf-py /app/full \
33
+ && cp -r requirements /app/full \
34
+ && cp requirements.txt /app/full \
35
+ && cp .devops/tools.sh /app/full/tools.sh
36
+
37
+ ## Base image
38
+ FROM ubuntu:$UBUNTU_VERSION AS base
39
+
40
+ RUN apt-get update \
41
+ && apt-get install -y libgomp1 curl\
42
+ && apt autoremove -y \
43
+ && apt clean -y \
44
+ && rm -rf /tmp/* /var/tmp/* \
45
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
46
+ && find /var/cache -type f -delete
47
+
48
+ COPY --from=build /app/lib/ /app
49
+
50
+ ### Full
51
+ FROM base AS full
52
+
53
+ COPY --from=build /app/full /app
54
+
55
+ WORKDIR /app
56
+
57
+ RUN apt-get update \
58
+ && apt-get install -y \
59
+ git \
60
+ python3 \
61
+ python3-pip \
62
+ && pip install --upgrade pip setuptools wheel \
63
+ && pip install -r requirements.txt \
64
+ && apt autoremove -y \
65
+ && apt clean -y \
66
+ && rm -rf /tmp/* /var/tmp/* \
67
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
68
+ && find /var/cache -type f -delete
69
+
70
+ ENTRYPOINT ["/app/tools.sh"]
71
+
72
+ ### Light, CLI only
73
+ FROM base AS light
74
+
75
+ COPY --from=build /app/full/llama-cli /app
76
+
77
+ WORKDIR /app
78
+
79
+ ENTRYPOINT [ "/app/llama-cli" ]
80
+
81
+ ### Server, Server only
82
+ FROM base AS server
83
+
84
+ ENV LLAMA_ARG_HOST=0.0.0.0
85
+
86
+ COPY --from=build /app/full/llama-server /app
87
+
88
+ WORKDIR /app
89
+
90
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
91
+
92
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/cuda.Dockerfile ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.4.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23
+ fi && \
24
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25
+ cmake --build build --config Release -j$(nproc)
26
+
27
+ RUN mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ RUN mkdir -p /app/full \
31
+ && cp build/bin/* /app/full \
32
+ && cp *.py /app/full \
33
+ && cp -r gguf-py /app/full \
34
+ && cp -r requirements /app/full \
35
+ && cp requirements.txt /app/full \
36
+ && cp .devops/tools.sh /app/full/tools.sh
37
+
38
+ ## Base image
39
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40
+
41
+ RUN apt-get update \
42
+ && apt-get install -y libgomp1 curl\
43
+ && apt autoremove -y \
44
+ && apt clean -y \
45
+ && rm -rf /tmp/* /var/tmp/* \
46
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47
+ && find /var/cache -type f -delete
48
+
49
+ COPY --from=build /app/lib/ /app
50
+
51
+ ### Full
52
+ FROM base AS full
53
+
54
+ COPY --from=build /app/full /app
55
+
56
+ WORKDIR /app
57
+
58
+ RUN apt-get update \
59
+ && apt-get install -y \
60
+ git \
61
+ python3 \
62
+ python3-pip \
63
+ && pip install --upgrade pip setuptools wheel \
64
+ && pip install -r requirements.txt \
65
+ && apt autoremove -y \
66
+ && apt clean -y \
67
+ && rm -rf /tmp/* /var/tmp/* \
68
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
69
+ && find /var/cache -type f -delete
70
+
71
+
72
+ ENTRYPOINT ["/app/tools.sh"]
73
+
74
+ ### Light, CLI only
75
+ FROM base AS light
76
+
77
+ COPY --from=build /app/full/llama-cli /app
78
+
79
+ WORKDIR /app
80
+
81
+ ENTRYPOINT [ "/app/llama-cli" ]
82
+
83
+ ### Server, Server only
84
+ FROM base AS server
85
+
86
+ ENV LLAMA_ARG_HOST=0.0.0.0
87
+
88
+ COPY --from=build /app/full/llama-server /app
89
+
90
+ WORKDIR /app
91
+
92
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
93
+
94
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/intel.Dockerfile ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
2
+
3
+ ## Build Image
4
+
5
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
6
+
7
+ ARG GGML_SYCL_F16=OFF
8
+ RUN apt-get update && \
9
+ apt-get install -y git libcurl4-openssl-dev
10
+
11
+ WORKDIR /app
12
+
13
+ COPY . .
14
+
15
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
16
+ echo "GGML_SYCL_F16 is set" \
17
+ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
18
+ fi && \
19
+ echo "Building with dynamic libs" && \
20
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
21
+ cmake --build build --config Release -j$(nproc)
22
+
23
+ RUN mkdir -p /app/lib && \
24
+ find build -name "*.so" -exec cp {} /app/lib \;
25
+
26
+ RUN mkdir -p /app/full \
27
+ && cp build/bin/* /app/full \
28
+ && cp *.py /app/full \
29
+ && cp -r gguf-py /app/full \
30
+ && cp -r requirements /app/full \
31
+ && cp requirements.txt /app/full \
32
+ && cp .devops/tools.sh /app/full/tools.sh
33
+
34
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl\
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ ### Full
45
+ FROM base AS full
46
+
47
+ COPY --from=build /app/lib/ /app
48
+ COPY --from=build /app/full /app
49
+
50
+ WORKDIR /app
51
+
52
+ RUN apt-get update \
53
+ && apt-get install -y \
54
+ git \
55
+ python3 \
56
+ python3-pip \
57
+ && pip install --upgrade pip setuptools wheel \
58
+ && pip install -r requirements.txt \
59
+ && apt autoremove -y \
60
+ && apt clean -y \
61
+ && rm -rf /tmp/* /var/tmp/* \
62
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
63
+ && find /var/cache -type f -delete
64
+
65
+
66
+ ENTRYPOINT ["/app/tools.sh"]
67
+
68
+ ### Light, CLI only
69
+ FROM base AS light
70
+
71
+ COPY --from=build /app/lib/ /app
72
+ COPY --from=build /app/full/llama-cli /app
73
+
74
+ WORKDIR /app
75
+
76
+ ENTRYPOINT [ "/app/llama-cli" ]
77
+
78
+ ### Server, Server only
79
+ FROM base AS server
80
+
81
+ ENV LLAMA_ARG_HOST=0.0.0.0
82
+
83
+ COPY --from=build /app/lib/ /app
84
+ COPY --from=build /app/full/llama-server /app
85
+
86
+ WORKDIR /app
87
+
88
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
89
+
90
+ ENTRYPOINT [ "/app/llama-server" ]
91
+
.devops/llama-cli-cann.Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
2
+
3
+ FROM ascendai/cann:$ASCEND_VERSION AS build
4
+
5
+ WORKDIR /app
6
+
7
+ COPY . .
8
+
9
+ RUN yum install -y gcc g++ cmake make
10
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
11
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
12
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
13
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
14
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
15
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
16
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
17
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
18
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
19
+
20
+ # find libascend_hal.so, because the drive hasn`t been mounted.
21
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
22
+
23
+ RUN echo "Building with static libs" && \
24
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
26
+ cmake --build build --config Release --target llama-cli
27
+
28
+ # TODO: use image with NNRT
29
+ FROM ascendai/cann:$ASCEND_VERSION AS runtime
30
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
31
+
32
+ ENV LC_ALL=C.utf8
33
+
34
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
35
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
36
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
37
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
38
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
39
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
40
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
41
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
42
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
43
+
44
+ ENTRYPOINT ["/llama-cli" ]
.devops/llama-cpp-cuda.srpm.spec ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - [email protected]
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13
+ # It is up to the user to install the correct vendor-specific support.
14
+
15
+ Name: llama.cpp-cuda
16
+ Version: %( date "+%%Y%%m%%d" )
17
+ Release: 1%{?dist}
18
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19
+ License: MIT
20
+ Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
21
+ BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22
+ Requires: cuda-toolkit
23
+ URL: https://github.com/ggml-org/llama.cpp
24
+
25
+ %define debug_package %{nil}
26
+ %define source_date_epoch_from_changelog 0
27
+
28
+ %description
29
+ CPU inference for Meta's Lllama2 models using default options.
30
+
31
+ %prep
32
+ %setup -n llama.cpp-master
33
+
34
+ %build
35
+ make -j GGML_CUDA=1
36
+
37
+ %install
38
+ mkdir -p %{buildroot}%{_bindir}/
39
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40
+ cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
41
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
42
+
43
+ mkdir -p %{buildroot}/usr/lib/systemd/system
44
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
45
+ [Unit]
46
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
47
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48
+
49
+ [Service]
50
+ Type=simple
51
+ EnvironmentFile=/etc/sysconfig/llama
52
+ ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
53
+ ExecReload=/bin/kill -s HUP $MAINPID
54
+ Restart=never
55
+
56
+ [Install]
57
+ WantedBy=default.target
58
+ EOF
59
+
60
+ mkdir -p %{buildroot}/etc/sysconfig
61
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63
+ EOF
64
+
65
+ %clean
66
+ rm -rf %{buildroot}
67
+ rm -rf %{_builddir}/*
68
+
69
+ %files
70
+ %{_bindir}/llama-cuda-cli
71
+ %{_bindir}/llama-cuda-server
72
+ %{_bindir}/llama-cuda-simple
73
+ /usr/lib/systemd/system/llamacuda.service
74
+ %config /etc/sysconfig/llama
75
+
76
+ %pre
77
+
78
+ %post
79
+
80
+ %preun
81
+ %postun
82
+
83
+ %changelog
.devops/llama-cpp.srpm.spec ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - [email protected]
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # In the meantime, YYYYMMDD format will be used.
10
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
11
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
12
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
13
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
14
+ # It is up to the user to install the correct vendor-specific support.
15
+
16
+ Name: llama.cpp
17
+ Version: %( date "+%%Y%%m%%d" )
18
+ Release: 1%{?dist}
19
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
20
+ License: MIT
21
+ Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
22
+ BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23
+ Requires: libstdc++
24
+ URL: https://github.com/ggml-org/llama.cpp
25
+
26
+ %define debug_package %{nil}
27
+ %define source_date_epoch_from_changelog 0
28
+
29
+ %description
30
+ CPU inference for Meta's Lllama2 models using default options.
31
+ Models are not included in this package and must be downloaded separately.
32
+
33
+ %prep
34
+ %setup -n llama.cpp-master
35
+
36
+ %build
37
+ make -j
38
+
39
+ %install
40
+ mkdir -p %{buildroot}%{_bindir}/
41
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42
+ cp -p llama-server %{buildroot}%{_bindir}/llama-server
43
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
44
+
45
+ mkdir -p %{buildroot}/usr/lib/systemd/system
46
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47
+ [Unit]
48
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
49
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50
+
51
+ [Service]
52
+ Type=simple
53
+ EnvironmentFile=/etc/sysconfig/llama
54
+ ExecStart=/usr/bin/llama-server $LLAMA_ARGS
55
+ ExecReload=/bin/kill -s HUP $MAINPID
56
+ Restart=never
57
+
58
+ [Install]
59
+ WantedBy=default.target
60
+ EOF
61
+
62
+ mkdir -p %{buildroot}/etc/sysconfig
63
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65
+ EOF
66
+
67
+ %clean
68
+ rm -rf %{buildroot}
69
+ rm -rf %{_builddir}/*
70
+
71
+ %files
72
+ %{_bindir}/llama-cli
73
+ %{_bindir}/llama-server
74
+ %{_bindir}/llama-simple
75
+ /usr/lib/systemd/system/llama.service
76
+ %config /etc/sysconfig/llama
77
+
78
+ %pre
79
+
80
+ %post
81
+
82
+ %preun
83
+ %postun
84
+
85
+ %changelog
.devops/musa.Dockerfile ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.1
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ # MUSA architecture to build for (defaults to all supported archs)
12
+ ARG MUSA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y \
16
+ build-essential \
17
+ cmake \
18
+ python3 \
19
+ python3-pip \
20
+ git \
21
+ libcurl4-openssl-dev \
22
+ libgomp1
23
+
24
+ COPY requirements.txt requirements.txt
25
+ COPY requirements requirements
26
+
27
+ RUN pip install --upgrade pip setuptools wheel \
28
+ && pip install -r requirements.txt
29
+
30
+ WORKDIR /app
31
+
32
+ COPY . .
33
+
34
+ # Use the default MUSA archs if not specified
35
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
36
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
37
+ fi && \
38
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
39
+ cmake --build build --config Release -j$(nproc)
40
+
41
+ RUN mkdir -p /app/lib && \
42
+ find build -name "*.so" -exec cp {} /app/lib \;
43
+
44
+ RUN mkdir -p /app/full \
45
+ && cp build/bin/* /app/full \
46
+ && cp *.py /app/full \
47
+ && cp -r gguf-py /app/full \
48
+ && cp -r requirements /app/full \
49
+ && cp requirements.txt /app/full \
50
+ && cp .devops/tools.sh /app/full/tools.sh
51
+
52
+ ## Base image
53
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS base
54
+
55
+ RUN apt-get update \
56
+ && apt-get install -y libgomp1 curl\
57
+ && apt autoremove -y \
58
+ && apt clean -y \
59
+ && rm -rf /tmp/* /var/tmp/* \
60
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
61
+ && find /var/cache -type f -delete
62
+
63
+ COPY --from=build /app/lib/ /app
64
+
65
+ ### Full
66
+ FROM base AS full
67
+
68
+ COPY --from=build /app/full /app
69
+
70
+ WORKDIR /app
71
+
72
+ RUN apt-get update \
73
+ && apt-get install -y \
74
+ git \
75
+ python3 \
76
+ python3-pip \
77
+ && pip install --upgrade pip setuptools wheel \
78
+ && pip install -r requirements.txt \
79
+ && apt autoremove -y \
80
+ && apt clean -y \
81
+ && rm -rf /tmp/* /var/tmp/* \
82
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
83
+ && find /var/cache -type f -delete
84
+
85
+
86
+ ENTRYPOINT ["/app/tools.sh"]
87
+
88
+ ### Light, CLI only
89
+ FROM base AS light
90
+
91
+ COPY --from=build /app/full/llama-cli /app
92
+
93
+ WORKDIR /app
94
+
95
+ ENTRYPOINT [ "/app/llama-cli" ]
96
+
97
+ ### Server, Server only
98
+ FROM base AS server
99
+
100
+ ENV LLAMA_ARG_HOST=0.0.0.0
101
+
102
+ COPY --from=build /app/full/llama-server /app
103
+
104
+ WORKDIR /app
105
+
106
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
107
+
108
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/nix/apps.nix ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ perSystem =
3
+ { config, lib, ... }:
4
+ {
5
+ apps =
6
+ let
7
+ inherit (config.packages) default;
8
+ binaries = [
9
+ "llama-cli"
10
+ "llama-embedding"
11
+ "llama-server"
12
+ "llama-quantize"
13
+ ];
14
+ mkApp = name: {
15
+ type = "app";
16
+ program = "${default}/bin/${name}";
17
+ };
18
+ in
19
+ lib.genAttrs binaries mkApp;
20
+ };
21
+ }
.devops/nix/devshells.nix ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+
3
+ {
4
+ perSystem =
5
+ {
6
+ config,
7
+ lib,
8
+ system,
9
+ ...
10
+ }:
11
+ {
12
+ devShells =
13
+ let
14
+ pkgs = import inputs.nixpkgs { inherit system; };
15
+ stdenv = pkgs.stdenv;
16
+ scripts = config.packages.python-scripts;
17
+ in
18
+ lib.pipe (config.packages) [
19
+ (lib.concatMapAttrs (
20
+ name: package: {
21
+ ${name} = pkgs.mkShell {
22
+ name = "${name}";
23
+ inputsFrom = [ package ];
24
+ shellHook = ''
25
+ echo "Entering ${name} devShell"
26
+ '';
27
+ };
28
+ "${name}-extra" =
29
+ if (name == "python-scripts") then
30
+ null
31
+ else
32
+ pkgs.mkShell {
33
+ name = "${name}-extra";
34
+ inputsFrom = [
35
+ package
36
+ scripts
37
+ ];
38
+ # Extra packages that *may* be used by some scripts
39
+ packages = [
40
+ pkgs.python3Packages.tiktoken
41
+ ];
42
+ shellHook = ''
43
+ echo "Entering ${name} devShell"
44
+ addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45
+ '';
46
+ };
47
+ }
48
+ ))
49
+ (lib.filterAttrs (name: value: value != null))
50
+ ];
51
+ };
52
+ }
.devops/nix/docker.nix ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ dockerTools,
4
+ buildEnv,
5
+ llama-cpp,
6
+ interactive ? true,
7
+ coreutils,
8
+ }:
9
+
10
+ # A tar that can be fed into `docker load`:
11
+ #
12
+ # $ nix build .#llamaPackages.docker
13
+ # $ docker load < result
14
+
15
+ # For details and variations cf.
16
+ # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17
+ # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18
+ # - https://nixery.dev/
19
+
20
+ # Approximate (compressed) sizes, at the time of writing, are:
21
+ #
22
+ # .#llamaPackages.docker: 125M;
23
+ # .#llamaPackagesCuda.docker: 537M;
24
+ # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25
+
26
+ dockerTools.buildLayeredImage {
27
+ name = llama-cpp.pname;
28
+ tag = "latest";
29
+
30
+ contents =
31
+ [ llama-cpp ]
32
+ ++ lib.optionals interactive [
33
+ coreutils
34
+ dockerTools.binSh
35
+ dockerTools.caCertificates
36
+ ];
37
+ }
.devops/nix/jetson-support.nix ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ perSystem =
4
+ {
5
+ config,
6
+ system,
7
+ lib,
8
+ pkgsCuda,
9
+ ...
10
+ }:
11
+ {
12
+ legacyPackages =
13
+ let
14
+ caps.llamaPackagesXavier = "7.2";
15
+ caps.llamaPackagesOrin = "8.7";
16
+ caps.llamaPackagesTX2 = "6.2";
17
+ caps.llamaPackagesNano = "5.3";
18
+
19
+ pkgsFor =
20
+ cap:
21
+ import inputs.nixpkgs {
22
+ inherit system;
23
+ config = {
24
+ cudaSupport = true;
25
+ cudaCapabilities = [ cap ];
26
+ cudaEnableForwardCompat = false;
27
+ inherit (pkgsCuda.config) allowUnfreePredicate;
28
+ };
29
+ };
30
+ in
31
+ builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
32
+
33
+ packages = lib.optionalAttrs (system == "aarch64-linux") {
34
+ jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
35
+ jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
36
+ jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
37
+ };
38
+ };
39
+ }
.devops/nix/nixpkgs-instances.nix ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ # The _module.args definitions are passed on to modules as arguments. E.g.
4
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
5
+ # `_module.args.pkgs` (defined in this case by flake-parts).
6
+ perSystem =
7
+ { system, ... }:
8
+ {
9
+ _module.args = {
10
+ # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11
+ # again, the below creates several nixpkgs instances which the
12
+ # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13
+ #
14
+ # This is currently "slow" and "expensive", on a certain scale.
15
+ # This also isn't "right" in that this hinders dependency injection at
16
+ # the level of flake inputs. This might get removed in the foreseeable
17
+ # future.
18
+ #
19
+ # Note that you can use these expressions without Nix
20
+ # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21
+
22
+ pkgsCuda = import inputs.nixpkgs {
23
+ inherit system;
24
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
25
+ # and ucx are built with CUDA support)
26
+ config.cudaSupport = true;
27
+ config.allowUnfreePredicate =
28
+ p:
29
+ builtins.all (
30
+ license:
31
+ license.free
32
+ || builtins.elem license.shortName [
33
+ "CUDA EULA"
34
+ "cuDNN EULA"
35
+ ]
36
+ ) (p.meta.licenses or [ p.meta.license ]);
37
+ };
38
+ # Ensure dependencies use ROCm consistently
39
+ pkgsRocm = import inputs.nixpkgs {
40
+ inherit system;
41
+ config.rocmSupport = true;
42
+ };
43
+ };
44
+ };
45
+ }
.devops/nix/package-gguf-py.nix ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ llamaVersion,
4
+ numpy,
5
+ tqdm,
6
+ sentencepiece,
7
+ pyyaml,
8
+ poetry-core,
9
+ buildPythonPackage,
10
+ pytestCheckHook,
11
+ }:
12
+
13
+ buildPythonPackage {
14
+ pname = "gguf";
15
+ version = llamaVersion;
16
+ pyproject = true;
17
+ nativeBuildInputs = [ poetry-core ];
18
+ propagatedBuildInputs = [
19
+ numpy
20
+ tqdm
21
+ sentencepiece
22
+ pyyaml
23
+ ];
24
+ src = lib.cleanSource ../../gguf-py;
25
+ pythonImportsCheck = [
26
+ "numpy"
27
+ "gguf"
28
+ ];
29
+ nativeCheckInputs = [ pytestCheckHook ];
30
+ doCheck = true;
31
+ meta = with lib; {
32
+ description = "Python package for writing binary files in the GGUF format";
33
+ license = licenses.mit;
34
+ maintainers = [ maintainers.ditsuke ];
35
+ };
36
+ }
.devops/nix/package.nix ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ glibc,
4
+ config,
5
+ stdenv,
6
+ runCommand,
7
+ cmake,
8
+ ninja,
9
+ pkg-config,
10
+ git,
11
+ mpi,
12
+ blas,
13
+ cudaPackages,
14
+ autoAddDriverRunpath,
15
+ darwin,
16
+ rocmPackages,
17
+ vulkan-headers,
18
+ vulkan-loader,
19
+ curl,
20
+ shaderc,
21
+ useBlas ?
22
+ builtins.all (x: !x) [
23
+ useCuda
24
+ useMetalKit
25
+ useRocm
26
+ useVulkan
27
+ ]
28
+ && blas.meta.available,
29
+ useCuda ? config.cudaSupport,
30
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31
+ # Increases the runtime closure size by ~700M
32
+ useMpi ? false,
33
+ useRocm ? config.rocmSupport,
34
+ rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
35
+ enableCurl ? true,
36
+ useVulkan ? false,
37
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
38
+
39
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
40
+ # otherwise we get libstdc++ errors downstream.
41
+ effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
42
+ enableStatic ? effectiveStdenv.hostPlatform.isStatic,
43
+ precompileMetalShaders ? false,
44
+ }:
45
+
46
+ let
47
+ inherit (lib)
48
+ cmakeBool
49
+ cmakeFeature
50
+ optionals
51
+ strings
52
+ ;
53
+
54
+ stdenv = throw "Use effectiveStdenv instead";
55
+
56
+ suffices =
57
+ lib.optionals useBlas [ "BLAS" ]
58
+ ++ lib.optionals useCuda [ "CUDA" ]
59
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
60
+ ++ lib.optionals useMpi [ "MPI" ]
61
+ ++ lib.optionals useRocm [ "ROCm" ]
62
+ ++ lib.optionals useVulkan [ "Vulkan" ];
63
+
64
+ pnameSuffix =
65
+ strings.optionalString (suffices != [ ])
66
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
67
+ descriptionSuffix = strings.optionalString (
68
+ suffices != [ ]
69
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
70
+
71
+ xcrunHost = runCommand "xcrunHost" { } ''
72
+ mkdir -p $out/bin
73
+ ln -s /usr/bin/xcrun $out/bin
74
+ '';
75
+
76
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
77
+ # separately
78
+ darwinBuildInputs =
79
+ with darwin.apple_sdk.frameworks;
80
+ [
81
+ Accelerate
82
+ CoreVideo
83
+ CoreGraphics
84
+ ]
85
+ ++ optionals useMetalKit [ MetalKit ];
86
+
87
+ cudaBuildInputs = with cudaPackages; [
88
+ cuda_cudart
89
+ cuda_cccl # <nv/target>
90
+ libcublas
91
+ ];
92
+
93
+ rocmBuildInputs = with rocmPackages; [
94
+ clr
95
+ hipblas
96
+ rocblas
97
+ ];
98
+
99
+ vulkanBuildInputs = [
100
+ vulkan-headers
101
+ vulkan-loader
102
+ shaderc
103
+ ];
104
+ in
105
+
106
+ effectiveStdenv.mkDerivation (finalAttrs: {
107
+ pname = "llama-cpp${pnameSuffix}";
108
+ version = llamaVersion;
109
+
110
+ # Note: none of the files discarded here are visible in the sandbox or
111
+ # affect the output hash. This also means they can be modified without
112
+ # triggering a rebuild.
113
+ src = lib.cleanSourceWith {
114
+ filter =
115
+ name: type:
116
+ let
117
+ noneOf = builtins.all (x: !x);
118
+ baseName = baseNameOf name;
119
+ in
120
+ noneOf [
121
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
122
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
123
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
124
+ (baseName == "flake.lock")
125
+ ];
126
+ src = lib.cleanSource ../../.;
127
+ };
128
+
129
+ postPatch = ''
130
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
131
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
132
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
133
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
134
+ '';
135
+
136
+ # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
137
+ # `default.metallib` may be compiled with Metal compiler from XCode
138
+ # and we need to escape sandbox on MacOS to access Metal compiler.
139
+ # `xcrun` is used find the path of the Metal compiler, which is varible
140
+ # and not on $PATH
141
+ # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
142
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
143
+
144
+ nativeBuildInputs =
145
+ [
146
+ cmake
147
+ ninja
148
+ pkg-config
149
+ git
150
+ ]
151
+ ++ optionals useCuda [
152
+ cudaPackages.cuda_nvcc
153
+
154
+ autoAddDriverRunpath
155
+ ]
156
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
157
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
158
+
159
+ buildInputs =
160
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
161
+ ++ optionals useCuda cudaBuildInputs
162
+ ++ optionals useMpi [ mpi ]
163
+ ++ optionals useRocm rocmBuildInputs
164
+ ++ optionals useBlas [ blas ]
165
+ ++ optionals useVulkan vulkanBuildInputs
166
+ ++ optionals enableCurl [ curl ];
167
+
168
+ cmakeFlags =
169
+ [
170
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
171
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
172
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
173
+ (cmakeBool "LLAMA_CURL" enableCurl)
174
+ (cmakeBool "GGML_NATIVE" false)
175
+ (cmakeBool "GGML_BLAS" useBlas)
176
+ (cmakeBool "GGML_CUDA" useCuda)
177
+ (cmakeBool "GGML_HIP" useRocm)
178
+ (cmakeBool "GGML_METAL" useMetalKit)
179
+ (cmakeBool "GGML_VULKAN" useVulkan)
180
+ (cmakeBool "GGML_STATIC" enableStatic)
181
+ ]
182
+ ++ optionals useCuda [
183
+ (
184
+ with cudaPackages.flags;
185
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
186
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
187
+ )
188
+ )
189
+ ]
190
+ ++ optionals useRocm [
191
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
192
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
193
+ ]
194
+ ++ optionals useMetalKit [
195
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
196
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
197
+ ];
198
+
199
+ # Environment variables needed for ROCm
200
+ env = optionals useRocm {
201
+ ROCM_PATH = "${rocmPackages.clr}";
202
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
203
+ };
204
+
205
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
206
+ # if they haven't been added yet.
207
+ postInstall = ''
208
+ mkdir -p $out/include
209
+ cp $src/include/llama.h $out/include/
210
+ '';
211
+
212
+ meta = {
213
+ # Configurations we don't want even the CI to evaluate. Results in the
214
+ # "unsupported platform" messages. This is mostly a no-op, because
215
+ # cudaPackages would've refused to evaluate anyway.
216
+ badPlatforms = optionals useCuda lib.platforms.darwin;
217
+
218
+ # Configurations that are known to result in build failures. Can be
219
+ # overridden by importing Nixpkgs with `allowBroken = true`.
220
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
221
+
222
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
223
+ homepage = "https://github.com/ggml-org/llama.cpp/";
224
+ license = lib.licenses.mit;
225
+
226
+ # Accommodates `nix run` and `lib.getExe`
227
+ mainProgram = "llama-cli";
228
+
229
+ # These people might respond, on the best effort basis, if you ping them
230
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
231
+ # Consider adding yourself to this list if you want to ensure this flake
232
+ # stays maintained and you're willing to invest your time. Do not add
233
+ # other people without their consent. Consider removing people after
234
+ # they've been unreachable for long periods of time.
235
+
236
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
237
+ # an attrset following the same format as in
238
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
239
+ maintainers = with lib.maintainers; [
240
+ philiptaron
241
+ SomeoneSerge
242
+ ];
243
+
244
+ # Extend `badPlatforms` instead
245
+ platforms = lib.platforms.all;
246
+ };
247
+ })
.devops/nix/python-scripts.nix ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ stdenv,
4
+ buildPythonPackage,
5
+ poetry-core,
6
+ mkShell,
7
+ python3Packages,
8
+ gguf-py,
9
+ }@inputs:
10
+
11
+ let
12
+ llama-python-deps = with python3Packages; [
13
+ numpy
14
+ sentencepiece
15
+ transformers
16
+ protobuf
17
+ torchWithoutCuda
18
+ gguf-py
19
+ tqdm
20
+
21
+ # for scripts/compare-llama-bench.py
22
+ gitpython
23
+ tabulate
24
+
25
+ # for examples/pydantic-models-to-grammar-examples.py
26
+ docstring-parser
27
+ pydantic
28
+
29
+ ];
30
+
31
+ llama-python-test-deps = with python3Packages; [
32
+ # Server bench
33
+ matplotlib
34
+
35
+ # server tests
36
+ openai
37
+ pytest
38
+ prometheus-client
39
+ ];
40
+ in
41
+
42
+ buildPythonPackage ({
43
+ pname = "llama-scripts";
44
+ version = "0.0.0";
45
+ pyproject = true;
46
+
47
+ # NOTE: The files filtered out here are not visible in the build sandbox, neither
48
+ # do they affect the output hash. They can be modified without triggering a rebuild.
49
+ src = lib.cleanSourceWith {
50
+ filter =
51
+ name: type:
52
+ let
53
+ any = builtins.any (x: x);
54
+ baseName = builtins.baseNameOf name;
55
+ in
56
+ any [
57
+ (lib.hasSuffix ".py" name)
58
+ (baseName == "README.md")
59
+ (baseName == "pyproject.toml")
60
+ ];
61
+ src = lib.cleanSource ../../.;
62
+ };
63
+ nativeBuildInputs = [ poetry-core ];
64
+ nativeCheckInputs = llama-python-test-deps;
65
+ dependencies = llama-python-deps;
66
+ })
.devops/nix/scope.nix ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ newScope,
4
+ python3,
5
+ llamaVersion ? "0.0.0",
6
+ }:
7
+
8
+ let
9
+ pythonPackages = python3.pkgs;
10
+ buildPythonPackage = pythonPackages.buildPythonPackage;
11
+ numpy = pythonPackages.numpy;
12
+ tqdm = pythonPackages.tqdm;
13
+ sentencepiece = pythonPackages.sentencepiece;
14
+ pyyaml = pythonPackages.pyyaml;
15
+ poetry-core = pythonPackages.poetry-core;
16
+ pytestCheckHook = pythonPackages.pytestCheckHook;
17
+ in
18
+
19
+ # We're using `makeScope` instead of just writing out an attrset
20
+ # because it allows users to apply overlays later using `overrideScope'`.
21
+ # Cf. https://noogle.dev/f/lib/makeScope
22
+
23
+ lib.makeScope newScope (self: {
24
+ inherit llamaVersion;
25
+ gguf-py = self.callPackage ./package-gguf-py.nix {
26
+ inherit
27
+ buildPythonPackage
28
+ numpy
29
+ tqdm
30
+ sentencepiece
31
+ poetry-core
32
+ pyyaml
33
+ pytestCheckHook
34
+ ;
35
+ };
36
+ python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
37
+ llama-cpp = self.callPackage ./package.nix { };
38
+ docker = self.callPackage ./docker.nix { };
39
+ docker-min = self.callPackage ./docker.nix { interactive = false; };
40
+ sif = self.callPackage ./sif.nix { };
41
+ })
.devops/nix/sif.nix ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ singularity-tools,
4
+ llama-cpp,
5
+ bashInteractive,
6
+ interactive ? false,
7
+ }:
8
+
9
+ let
10
+ optionalInt = cond: x: if cond then x else 0;
11
+ in
12
+ singularity-tools.buildImage rec {
13
+ inherit (llama-cpp) name;
14
+ contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15
+
16
+ # These are excessive (but safe) for most variants. Building singularity
17
+ # images requires superuser privileges, so we build them inside a VM in a
18
+ # writable image of pre-determined size.
19
+ #
20
+ # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21
+ #
22
+ # Expected image sizes:
23
+ # - cpu/blas: 150M,
24
+ # - cuda, all gencodes: 560M,
25
+ diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26
+ memSize = diskSize;
27
+ }
.devops/rocm.Dockerfile ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=6.3
5
+ ARG AMDGPU_VERSION=6.3
6
+
7
+ # Target the CUDA build image
8
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
9
+
10
+ ### Build image
11
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
12
+
13
+ # Unless otherwise specified, we make a fat build.
14
+ # List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
15
+ # This is mostly tied to rocBLAS supported archs.
16
+ # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17
+ # gfx906 is deprecated
18
+ #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
19
+
20
+ #ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21
+ ARG ROCM_DOCKER_ARCH=gfx1100
22
+
23
+ # Set nvcc architectured
24
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25
+ # Enable ROCm
26
+ # ENV CC=/opt/rocm/llvm/bin/clang
27
+ # ENV CXX=/opt/rocm/llvm/bin/clang++
28
+
29
+ RUN apt-get update \
30
+ && apt-get install -y \
31
+ build-essential \
32
+ cmake \
33
+ git \
34
+ libcurl4-openssl-dev \
35
+ curl \
36
+ libgomp1
37
+
38
+ WORKDIR /app
39
+
40
+ COPY . .
41
+
42
+ RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43
+ cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
44
+ && cmake --build build --config Release -j$(nproc)
45
+
46
+ RUN mkdir -p /app/lib \
47
+ && find build -name "*.so" -exec cp {} /app/lib \;
48
+
49
+ RUN mkdir -p /app/full \
50
+ && cp build/bin/* /app/full \
51
+ && cp *.py /app/full \
52
+ && cp -r gguf-py /app/full \
53
+ && cp -r requirements /app/full \
54
+ && cp requirements.txt /app/full \
55
+ && cp .devops/tools.sh /app/full/tools.sh
56
+
57
+ ## Base image
58
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS base
59
+
60
+ RUN apt-get update \
61
+ && apt-get install -y libgomp1 curl\
62
+ && apt autoremove -y \
63
+ && apt clean -y \
64
+ && rm -rf /tmp/* /var/tmp/* \
65
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
66
+ && find /var/cache -type f -delete
67
+
68
+ COPY --from=build /app/lib/ /app
69
+
70
+ ### Full
71
+ FROM base AS full
72
+
73
+ COPY --from=build /app/full /app
74
+
75
+ WORKDIR /app
76
+
77
+ RUN apt-get update \
78
+ && apt-get install -y \
79
+ git \
80
+ python3-pip \
81
+ python3 \
82
+ python3-wheel\
83
+ && pip install --break-system-packages --upgrade setuptools \
84
+ && pip install --break-system-packages -r requirements.txt \
85
+ && apt autoremove -y \
86
+ && apt clean -y \
87
+ && rm -rf /tmp/* /var/tmp/* \
88
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
89
+ && find /var/cache -type f -delete
90
+
91
+ ENTRYPOINT ["/app/tools.sh"]
92
+
93
+ ### Light, CLI only
94
+ FROM base AS light
95
+
96
+ COPY --from=build /app/full/llama-cli /app
97
+
98
+ WORKDIR /app
99
+
100
+ ENTRYPOINT [ "/app/llama-cli" ]
101
+
102
+ ### Server, Server only
103
+ FROM base AS server
104
+
105
+ ENV LLAMA_ARG_HOST=0.0.0.0
106
+
107
+ COPY --from=build /app/full/llama-server /app
108
+
109
+ WORKDIR /app
110
+
111
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
112
+
113
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/tools.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # Read the first argument into a variable
5
+ arg1="$1"
6
+
7
+ # Shift the arguments to remove the first one
8
+ shift
9
+
10
+ if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11
+ exec python3 ./convert_hf_to_gguf.py "$@"
12
+ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
13
+ exec ./llama-quantize "$@"
14
+ elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
15
+ exec ./llama-cli "$@"
16
+ elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17
+ exec ./llama-bench "$@"
18
+ elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19
+ exec ./llama-perplexity "$@"
20
+ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
21
+ echo "Converting PTH to GGML..."
22
+ for i in $(ls $1/$2/ggml-model-f16.bin*); do
23
+ if [ -f "${i/f16/q4_0}" ]; then
24
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
25
+ else
26
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
27
+ exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
28
+ fi
29
+ done
30
+ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
31
+ exec ./llama-server "$@"
32
+ else
33
+ echo "Unknown command: $arg1"
34
+ echo "Available commands: "
35
+ echo " --run (-r): Run a model previously converted into ggml"
36
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37
+ echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38
+ echo " ex: -m model.gguf"
39
+ echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40
+ echo " ex: -m model.gguf -f file.txt"
41
+ echo " --convert (-c): Convert a llama model into ggml"
42
+ echo " ex: --outtype f16 \"/models/7B/\" "
43
+ echo " --quantize (-q): Optimize with quantization process ggml"
44
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
45
+ echo " --all-in-one (-a): Execute --convert & --quantize"
46
+ echo " ex: \"/models/\" 7B"
47
+ echo " --server (-s): Run a model on the server"
48
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
49
+ fi
.devops/vulkan.Dockerfile ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget
7
+
8
+ # Install Vulkan SDK and cURL
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+
17
+ COPY . .
18
+
19
+ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
20
+ cmake --build build --config Release -j$(nproc)
21
+
22
+ RUN mkdir -p /app/lib && \
23
+ find build -name "*.so" -exec cp {} /app/lib \;
24
+
25
+ RUN mkdir -p /app/full \
26
+ && cp build/bin/* /app/full \
27
+ && cp *.py /app/full \
28
+ && cp -r gguf-py /app/full \
29
+ && cp -r requirements /app/full \
30
+ && cp requirements.txt /app/full \
31
+ && cp .devops/tools.sh /app/full/tools.sh
32
+
33
+ ## Base image
34
+ FROM ubuntu:$UBUNTU_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl libvulkan-dev \
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ COPY --from=build /app/lib/ /app
45
+
46
+ ### Full
47
+ FROM base AS full
48
+
49
+ COPY --from=build /app/full /app
50
+
51
+ WORKDIR /app
52
+
53
+ RUN apt-get update \
54
+ && apt-get install -y \
55
+ git \
56
+ python3 \
57
+ python3-pip \
58
+ python3-wheel \
59
+ && pip install --break-system-packages --upgrade setuptools \
60
+ && pip install --break-system-packages -r requirements.txt \
61
+ && apt autoremove -y \
62
+ && apt clean -y \
63
+ && rm -rf /tmp/* /var/tmp/* \
64
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
65
+ && find /var/cache -type f -delete
66
+
67
+ ENTRYPOINT ["/app/tools.sh"]
68
+
69
+ ### Light, CLI only
70
+ FROM base AS light
71
+
72
+ COPY --from=build /app/full/llama-cli /app
73
+
74
+ WORKDIR /app
75
+
76
+ ENTRYPOINT [ "/app/llama-cli" ]
77
+
78
+ ### Server, Server only
79
+ FROM base AS server
80
+
81
+ ENV LLAMA_ARG_HOST=0.0.0.0
82
+
83
+ COPY --from=build /app/full/llama-server /app
84
+
85
+ WORKDIR /app
86
+
87
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
88
+
89
+ ENTRYPOINT [ "/app/llama-server" ]
.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ # Do not ignore .git directory, otherwise the reported build number will always be 0
5
+ .github/
6
+ .gitignore
7
+ .vs/
8
+ .vscode/
9
+ .DS_Store
10
+
11
+ build*/
12
+
13
+ models/*
14
+
15
+ /llama-cli
16
+ /llama-quantize
17
+
18
+ arm_neon.h
19
+ compile_commands.json
20
+ Dockerfile
.ecrc ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
3
+ "Disable": {
4
+ "IndentSize": true
5
+ }
6
+ }
.editorconfig ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://EditorConfig.org
2
+
3
+ # Top-most EditorConfig file
4
+ root = true
5
+
6
+ # Unix-style newlines with a newline ending every file, utf-8 charset
7
+ [*]
8
+ end_of_line = lf
9
+ insert_final_newline = true
10
+ trim_trailing_whitespace = true
11
+ charset = utf-8
12
+ indent_style = space
13
+ indent_size = 4
14
+
15
+ [Makefile]
16
+ indent_style = tab
17
+
18
+ [scripts/*.mk]
19
+ indent_style = tab
20
+
21
+ [prompts/*.txt]
22
+ insert_final_newline = unset
23
+
24
+ [examples/server/public/*]
25
+ indent_size = 2
26
+
27
+ [examples/server/public/deps_*]
28
+ trim_trailing_whitespace = unset
29
+ indent_style = unset
30
+ indent_size = unset
31
+
32
+ [examples/server/deps_*]
33
+ trim_trailing_whitespace = unset
34
+ indent_style = unset
35
+ indent_size = unset
36
+
37
+ [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
38
+ indent_style = tab
39
+
40
+ [examples/cvector-generator/*.txt]
41
+ trim_trailing_whitespace = unset
42
+ insert_final_newline = unset
43
+
44
+ [models/templates/*.jinja]
45
+ indent_style = unset
46
+ indent_size = unset
47
+ end_of_line = unset
48
+ charset = unset
49
+ trim_trailing_whitespace = unset
50
+ insert_final_newline = unset
.flake8 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 125
3
+ ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4
+ exclude =
5
+ # Do not traverse examples
6
+ examples,
7
+ # Do not include package initializers
8
+ __init__.py,
9
+ # No need to traverse our git directory
10
+ .git,
11
+ # There's no value in checking cache directories
12
+ __pycache__,
13
+ # No need to include the build path
14
+ build,
15
+ # This contains builds that we don't want to check
16
+ dist # This is generated with `python build .` for package releases
17
+ # max-complexity = 10
.gitattributes CHANGED
@@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs/development/llama-star/idea-arch.key filter=lfs diff=lfs merge=lfs -text
37
+ examples/server/themes/buttons-top/buttons_top.png filter=lfs diff=lfs merge=lfs -text
38
+ examples/server/themes/wild/llamapattern.png filter=lfs diff=lfs merge=lfs -text
39
+ examples/server/themes/wild/wild.png filter=lfs diff=lfs merge=lfs -text
40
+ media/llama0-banner.png filter=lfs diff=lfs merge=lfs -text
41
+ media/llama0-logo.png filter=lfs diff=lfs merge=lfs -text
42
+ media/matmul.png filter=lfs diff=lfs merge=lfs -text
43
+ models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
44
+ models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
45
+ models/ggml-vocab-bert-bge.gguf filter=lfs diff=lfs merge=lfs -text
46
+ models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
47
+ models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
48
+ models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
49
+ models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
50
+ models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
51
+ models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
52
+ models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
53
+ models/ggml-vocab-llama-spm.gguf filter=lfs diff=lfs merge=lfs -text
54
+ models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
55
+ models/ggml-vocab-phi-3.gguf filter=lfs diff=lfs merge=lfs -text
56
+ models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
57
+ models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
58
+ models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
.github/ISSUE_TEMPLATE/010-bug-compilation.yml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (compilation)
2
+ description: Something goes wrong when trying to compile llama.cpp.
3
+ title: "Compile bug: "
4
+ labels: ["bug-unconfirmed", "compilation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the compilation of llama.cpp fails.
11
+ Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
12
+ If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
13
+ by clearing `~/.cache/ccache` (on Linux).
14
+ - type: textarea
15
+ id: commit
16
+ attributes:
17
+ label: Git commit
18
+ description: Which commit are you trying to compile?
19
+ placeholder: |
20
+ $git rev-parse HEAD
21
+ 84a07a17b1b08cf2b9747c633a2372782848a27f
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: true
38
+ - type: dropdown
39
+ id: backends
40
+ attributes:
41
+ label: GGML backends
42
+ description: Which GGML backends do you know to be affected?
43
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
44
+ multiple: true
45
+ validations:
46
+ required: true
47
+ - type: textarea
48
+ id: info
49
+ attributes:
50
+ label: Problem description & steps to reproduce
51
+ description: >
52
+ Please give us a summary of the problem and tell us how to reproduce it.
53
+ If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
54
+ placeholder: >
55
+ I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
56
+ Here are the exact commands that I used: ...
57
+ validations:
58
+ required: true
59
+ - type: textarea
60
+ id: first_bad_commit
61
+ attributes:
62
+ label: First Bad Commit
63
+ description: >
64
+ If the bug was not present on an earlier version: when did it start appearing?
65
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
66
+ validations:
67
+ required: false
68
+ - type: textarea
69
+ id: command
70
+ attributes:
71
+ label: Compile command
72
+ description: >
73
+ Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
74
+ This will be automatically formatted into code, so no need for backticks.
75
+ render: shell
76
+ validations:
77
+ required: true
78
+ - type: textarea
79
+ id: logs
80
+ attributes:
81
+ label: Relevant log output
82
+ description: >
83
+ Please copy and paste any relevant log output, including any generated text.
84
+ This will be automatically formatted into code, so no need for backticks.
85
+ render: shell
86
+ validations:
87
+ required: true
.github/ISSUE_TEMPLATE/011-bug-results.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (model use)
2
+ description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
3
+ title: "Eval bug: "
4
+ labels: ["bug-unconfirmed", "model evaluation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the model evaluation results
11
+ (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
12
+ If you encountered the issue while using an external UI (e.g. ollama),
13
+ please reproduce your issue using one of the examples/binaries in this repository.
14
+ The `llama-cli` binary can be used for simple and reproducible model inference.
15
+ - type: textarea
16
+ id: version
17
+ attributes:
18
+ label: Name and Version
19
+ description: Which version of our software are you running? (use `--version` to get a version string)
20
+ placeholder: |
21
+ $./llama-cli --version
22
+ version: 2999 (42b4109e)
23
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
24
+ validations:
25
+ required: true
26
+ - type: dropdown
27
+ id: operating-system
28
+ attributes:
29
+ label: Operating systems
30
+ description: Which operating systems do you know to be affected?
31
+ multiple: true
32
+ options:
33
+ - Linux
34
+ - Mac
35
+ - Windows
36
+ - BSD
37
+ - Other? (Please let us know in description)
38
+ validations:
39
+ required: true
40
+ - type: dropdown
41
+ id: backends
42
+ attributes:
43
+ label: GGML backends
44
+ description: Which GGML backends do you know to be affected?
45
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
46
+ multiple: true
47
+ validations:
48
+ required: true
49
+ - type: textarea
50
+ id: hardware
51
+ attributes:
52
+ label: Hardware
53
+ description: Which CPUs/GPUs are you using?
54
+ placeholder: >
55
+ e.g. Ryzen 5950X + 2x RTX 4090
56
+ validations:
57
+ required: true
58
+ - type: textarea
59
+ id: model
60
+ attributes:
61
+ label: Models
62
+ description: >
63
+ Which model(s) at which quantization were you using when encountering the bug?
64
+ If you downloaded a GGUF file off of Huggingface, please provide a link.
65
+ placeholder: >
66
+ e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
67
+ validations:
68
+ required: false
69
+ - type: textarea
70
+ id: info
71
+ attributes:
72
+ label: Problem description & steps to reproduce
73
+ description: >
74
+ Please give us a summary of the problem and tell us how to reproduce it.
75
+ If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
76
+ that information would be very much appreciated by us.
77
+ placeholder: >
78
+ e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
79
+ When I use -ngl 0 it works correctly.
80
+ Here are the exact commands that I used: ...
81
+ validations:
82
+ required: true
83
+ - type: textarea
84
+ id: first_bad_commit
85
+ attributes:
86
+ label: First Bad Commit
87
+ description: >
88
+ If the bug was not present on an earlier version: when did it start appearing?
89
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
90
+ validations:
91
+ required: false
92
+ - type: textarea
93
+ id: logs
94
+ attributes:
95
+ label: Relevant log output
96
+ description: >
97
+ Please copy and paste any relevant log output, including the command that you entered and any generated text.
98
+ This will be automatically formatted into code, so no need for backticks.
99
+ render: shell
100
+ validations:
101
+ required: true
.github/ISSUE_TEMPLATE/019-bug-misc.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (misc.)
2
+ description: Something is not working the way it should (and it's not covered by any of the above cases).
3
+ title: "Misc. bug: "
4
+ labels: ["bug-unconfirmed"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for miscellaneous bugs that don't fit into any other category.
11
+ If you encountered the issue while using an external UI (e.g. ollama),
12
+ please reproduce your issue using one of the examples/binaries in this repository.
13
+ - type: textarea
14
+ id: version
15
+ attributes:
16
+ label: Name and Version
17
+ description: Which version of our software is affected? (You can use `--version` to get a version string.)
18
+ placeholder: |
19
+ $./llama-cli --version
20
+ version: 2999 (42b4109e)
21
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: false
38
+ - type: dropdown
39
+ id: module
40
+ attributes:
41
+ label: Which llama.cpp modules do you know to be affected?
42
+ multiple: true
43
+ options:
44
+ - Documentation/Github
45
+ - libllama (core library)
46
+ - llama-cli
47
+ - llama-server
48
+ - llama-bench
49
+ - llama-quantize
50
+ - Python/Bash scripts
51
+ - Test code
52
+ - Other (Please specify in the next section)
53
+ validations:
54
+ required: false
55
+ - type: textarea
56
+ id: command
57
+ attributes:
58
+ label: Command line
59
+ description: >
60
+ Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
61
+ This will be automatically formatted into code, so no need for backticks.
62
+ render: shell
63
+ validations:
64
+ required: false
65
+ - type: textarea
66
+ id: info
67
+ attributes:
68
+ label: Problem description & steps to reproduce
69
+ description: >
70
+ Please give us a summary of the problem and tell us how to reproduce it (if applicable).
71
+ validations:
72
+ required: true
73
+ - type: textarea
74
+ id: first_bad_commit
75
+ attributes:
76
+ label: First Bad Commit
77
+ description: >
78
+ If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
79
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
80
+ validations:
81
+ required: false
82
+ - type: textarea
83
+ id: logs
84
+ attributes:
85
+ label: Relevant log output
86
+ description: >
87
+ If applicable, please copy and paste any relevant log output, including any generated text.
88
+ This will be automatically formatted into code, so no need for backticks.
89
+ render: shell
90
+ validations:
91
+ required: false
.github/ISSUE_TEMPLATE/020-enhancement.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Enhancement
2
+ description: Used to request enhancements for llama.cpp.
3
+ title: "Feature Request: "
4
+ labels: ["enhancement"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)
10
+
11
+ - type: checkboxes
12
+ id: prerequisites
13
+ attributes:
14
+ label: Prerequisites
15
+ description: Please confirm the following before submitting your enhancement request.
16
+ options:
17
+ - label: I am running the latest code. Mention the version if possible as well.
18
+ required: true
19
+ - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
20
+ required: true
21
+ - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
22
+ required: true
23
+ - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
24
+ required: true
25
+
26
+ - type: textarea
27
+ id: feature-description
28
+ attributes:
29
+ label: Feature Description
30
+ description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
31
+ placeholder: Detailed description of the enhancement
32
+ validations:
33
+ required: true
34
+
35
+ - type: textarea
36
+ id: motivation
37
+ attributes:
38
+ label: Motivation
39
+ description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
40
+ placeholder: Explanation of why this feature is needed and its benefits
41
+ validations:
42
+ required: true
43
+
44
+ - type: textarea
45
+ id: possible-implementation
46
+ attributes:
47
+ label: Possible Implementation
48
+ description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
49
+ placeholder: Detailed description of potential implementation
50
+ validations:
51
+ required: false
.github/ISSUE_TEMPLATE/030-research.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Research
2
+ description: Track new technical research area.
3
+ title: "Research: "
4
+ labels: ["research 🔬"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
10
+
11
+ - type: checkboxes
12
+ id: research-stage
13
+ attributes:
14
+ label: Research Stage
15
+ description: Track general state of this research ticket
16
+ options:
17
+ - label: Background Research (Let's try to avoid reinventing the wheel)
18
+ - label: Hypothesis Formed (How do you think this will work and it's effect?)
19
+ - label: Strategy / Implementation Forming
20
+ - label: Analysis of results
21
+ - label: Debrief / Documentation (So people in the future can learn from us)
22
+
23
+ - type: textarea
24
+ id: background
25
+ attributes:
26
+ label: Previous existing literature and research
27
+ description: Whats the current state of the art and whats the motivation for this research?
28
+
29
+ - type: textarea
30
+ id: hypothesis
31
+ attributes:
32
+ label: Hypothesis
33
+ description: How do you think this will work and it's effect?
34
+
35
+ - type: textarea
36
+ id: implementation
37
+ attributes:
38
+ label: Implementation
39
+ description: Got an approach? e.g. a PR ready to go?
40
+
41
+ - type: textarea
42
+ id: analysis
43
+ attributes:
44
+ label: Analysis
45
+ description: How does the proposed implementation behave?
46
+
47
+ - type: textarea
48
+ id: logs
49
+ attributes:
50
+ label: Relevant log output
51
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
52
+ render: shell
.github/ISSUE_TEMPLATE/040-refactor.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Refactor (Maintainers)
2
+ description: Used to track refactoring opportunities.
3
+ title: "Refactor: "
4
+ labels: ["refactor"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
10
+ Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
11
+
12
+ - type: textarea
13
+ id: background-description
14
+ attributes:
15
+ label: Background Description
16
+ description: Please provide a detailed written description of the pain points you are trying to solve.
17
+ placeholder: Detailed description behind your motivation to request refactor
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: possible-approaches
23
+ attributes:
24
+ label: Possible Refactor Approaches
25
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
26
+ placeholder: Your idea of possible refactoring opportunity/approaches
27
+ validations:
28
+ required: false
.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: true
2
+ contact_links:
3
+ - name: Got an idea?
4
+ url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
5
+ about: Pop it there. It may then become an enhancement ticket.
6
+ - name: Got a question?
7
+ url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
8
+ about: Ask a question there!
9
+ - name: Want to contribute?
10
+ url: https://github.com/ggml-org/llama.cpp/wiki/contribute
11
+ about: Head to the contribution guide page of the wiki for areas you can help with
.github/labeler.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/actions/labeler
2
+ Kompute:
3
+ - changed-files:
4
+ - any-glob-to-any-file:
5
+ - ggml/include/ggml-kompute.h
6
+ - ggml/src/ggml-kompute/**
7
+ - README-kompute.md
8
+ Apple Metal:
9
+ - changed-files:
10
+ - any-glob-to-any-file:
11
+ - ggml/include/ggml-metal.h
12
+ - ggml/src/ggml-metal/**
13
+ - README-metal.md
14
+ SYCL:
15
+ - changed-files:
16
+ - any-glob-to-any-file:
17
+ - ggml/include/ggml-sycl.h
18
+ - ggml/src/ggml-sycl/**
19
+ - docs/backend/SYCL.md
20
+ - examples/sycl/**
21
+ Nvidia GPU:
22
+ - changed-files:
23
+ - any-glob-to-any-file:
24
+ - ggml/include/ggml-cuda.h
25
+ - ggml/src/ggml-cuda/**
26
+ Vulkan:
27
+ - changed-files:
28
+ - any-glob-to-any-file:
29
+ - ggml/include/ggml-vulkan.h
30
+ - ggml/src/ggml-vulkan/**
31
+ documentation:
32
+ - changed-files:
33
+ - any-glob-to-any-file:
34
+ - docs/**
35
+ - media/**
36
+ testing:
37
+ - changed-files:
38
+ - any-glob-to-any-file:
39
+ - tests/**
40
+ build:
41
+ - changed-files:
42
+ - any-glob-to-any-file:
43
+ - cmake/**
44
+ - CMakeLists.txt
45
+ - CMakePresets.json
46
+ examples:
47
+ - changed-files:
48
+ - any-glob-to-any-file: examples/**
49
+ devops:
50
+ - changed-files:
51
+ - any-glob-to-any-file:
52
+ - .devops/**
53
+ - .github/**
54
+ - ci/**
55
+ python:
56
+ - changed-files:
57
+ - any-glob-to-any-file:
58
+ - "**/*.py"
59
+ - requirements/**
60
+ - gguf-py/**
61
+ - .flake8
62
+ script:
63
+ - changed-files:
64
+ - any-glob-to-any-file:
65
+ - scripts/**
66
+ android:
67
+ - changed-files:
68
+ - any-glob-to-any-file:
69
+ - examples/llama.android/**
70
+ server:
71
+ - changed-files:
72
+ - any-glob-to-any-file:
73
+ - examples/server/**
74
+ ggml:
75
+ - changed-files:
76
+ - any-glob-to-any-file:
77
+ - ggml/**
78
+ nix:
79
+ - changed-files:
80
+ - any-glob-to-any-file:
81
+ - "**/*.nix"
82
+ - .github/workflows/nix-*.yml
83
+ - .devops/nix/nixpkgs-instances.nix
84
+ embedding:
85
+ - changed-files:
86
+ - any-glob-to-any-file: examples/embedding/
.github/pull_request_template.md ADDED
@@ -0,0 +1 @@
 
 
1
+ *Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
.github/workflows/bench.yml.disabled ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: there have been some issues with the workflow, so disabling for now
2
+ # https://github.com/ggml-org/llama.cpp/issues/7893
3
+ #
4
+ # Benchmark
5
+ name: Benchmark
6
+
7
+ on:
8
+ workflow_dispatch:
9
+ inputs:
10
+ gpu-series:
11
+ description: 'Azure GPU series to run with'
12
+ required: true
13
+ type: choice
14
+ options:
15
+ - Standard_NC4as_T4_v3
16
+ - Standard_NC24ads_A100_v4
17
+ - Standard_NC80adis_H100_v5
18
+ sha:
19
+ description: 'Commit SHA1 to build'
20
+ required: false
21
+ type: string
22
+ duration:
23
+ description: 'Duration of the bench'
24
+ type: string
25
+ default: 10m
26
+
27
+ push:
28
+ branches:
29
+ - master
30
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
31
+ pull_request_target:
32
+ types: [opened, synchronize, reopened]
33
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
34
+ schedule:
35
+ - cron: '04 2 * * *'
36
+
37
+ concurrency:
38
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
39
+ cancel-in-progress: true
40
+
41
+ jobs:
42
+ bench-server-baseline:
43
+ runs-on: Standard_NC4as_T4_v3
44
+ env:
45
+ RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
46
+ N_USERS: 8
47
+ DURATION: 10m
48
+
49
+ strategy:
50
+ matrix:
51
+ model: [phi-2]
52
+ ftype: [q4_0, q8_0, f16]
53
+ include:
54
+ - model: phi-2
55
+ ftype: q4_0
56
+ pr_comment_enabled: "true"
57
+
58
+ if: |
59
+ inputs.gpu-series == 'Standard_NC4as_T4_v3'
60
+ || github.event_name == 'pull_request_target'
61
+ steps:
62
+ - name: Clone
63
+ id: checkout
64
+ uses: actions/checkout@v4
65
+ with:
66
+ fetch-depth: 0
67
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
68
+
69
+ - name: Install python env
70
+ id: pipenv
71
+ run: |
72
+ cd examples/server/bench
73
+ python3 -m venv venv
74
+ source venv/bin/activate
75
+ pip install -r requirements.txt
76
+
77
+ - name: Prometheus
78
+ id: install_prometheus
79
+ run: |
80
+ wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
81
+ tar xzf prometheus*.tar.gz --strip-components=1
82
+ ./prometheus --config.file=examples/server/bench/prometheus.yml &
83
+ while ! nc -z localhost 9090; do
84
+ sleep 0.1
85
+ done
86
+
87
+ - name: Set up Go
88
+ uses: actions/setup-go@v5
89
+ with:
90
+ go-version: '1.21'
91
+
92
+ - name: Install k6 and xk6-sse
93
+ id: k6_installation
94
+ run: |
95
+ cd examples/server/bench
96
+ go install go.k6.io/xk6/cmd/xk6@latest
97
+ xk6 build master \
98
+ --with github.com/phymbert/xk6-sse
99
+
100
+ - name: Build
101
+ id: cmake_build
102
+ run: |
103
+ set -eux
104
+ cmake -B build \
105
+ -DGGML_NATIVE=OFF \
106
+ -DLLAMA_BUILD_SERVER=ON \
107
+ -DLLAMA_CURL=ON \
108
+ -DLLAMA_CUBLAS=ON \
109
+ -DCUDAToolkit_ROOT=/usr/local/cuda \
110
+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
111
+ -DCMAKE_CUDA_ARCHITECTURES=75 \
112
+ -DLLAMA_FATAL_WARNINGS=OFF \
113
+ -DLLAMA_ALL_WARNINGS=OFF \
114
+ -DCMAKE_BUILD_TYPE=Release;
115
+ cmake --build build --config Release -j $(nproc) --target llama-server
116
+
117
+ - name: Download the dataset
118
+ id: download_dataset
119
+ run: |
120
+ cd examples/server/bench
121
+ wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
122
+
123
+ - name: Server bench
124
+ id: server_bench
125
+ env:
126
+ HEAD_REF: ${{ github.head_ref || github.ref_name }}
127
+ run: |
128
+ set -eux
129
+
130
+ cd examples/server/bench
131
+ source venv/bin/activate
132
+ python bench.py \
133
+ --runner-label ${{ env.RUNNER_LABEL }} \
134
+ --name ${{ github.job }} \
135
+ --branch $HEAD_REF \
136
+ --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
137
+ --scenario script.js \
138
+ --duration ${{ github.event.inputs.duration || env.DURATION }} \
139
+ --hf-repo ggml-org/models \
140
+ --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
141
+ --model-path-prefix /models \
142
+ --parallel ${{ env.N_USERS }} \
143
+ -ngl 33 \
144
+ --batch-size 2048 \
145
+ --ubatch-size 256 \
146
+ --ctx-size 16384 \
147
+ --n-prompts 1000 \
148
+ --max-prompt-tokens 1024 \
149
+ --max-tokens 2048
150
+
151
+ cat results.github.env >> $GITHUB_ENV
152
+
153
+ # Remove dataset as we do not want it in the artefact
154
+ rm ShareGPT_V3_unfiltered_cleaned_split.json
155
+
156
+ - uses: actions/upload-artifact@v4
157
+ with:
158
+ name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
159
+ compression-level: 9
160
+ path: |
161
+ examples/server/bench/*.jpg
162
+ examples/server/bench/*.json
163
+ examples/server/bench/*.log
164
+
165
+ - name: Commit status
166
+ uses: Sibz/github-status-action@v1
167
+ with:
168
+ authToken: ${{secrets.GITHUB_TOKEN}}
169
+ sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
170
+ context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
171
+ description: |
172
+ ${{ env.BENCH_RESULTS }}
173
+ state: 'success'
174
+
175
+ - name: Upload benchmark images
176
+ uses: devicons/[email protected]
177
+ continue-on-error: true # Important as it looks unstable: 503
178
+ id: imgur_step
179
+ with:
180
+ client_id: ${{secrets.IMGUR_CLIENT_ID}}
181
+ path: |
182
+ examples/server/bench/prompt_tokens_seconds.jpg
183
+ examples/server/bench/predicted_tokens_seconds.jpg
184
+ examples/server/bench/kv_cache_usage_ratio.jpg
185
+ examples/server/bench/requests_processing.jpg
186
+
187
+ - name: Extract mermaid
188
+ id: set_mermaid
189
+ run: |
190
+ set -eux
191
+
192
+ cd examples/server/bench
193
+ PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
194
+ echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
195
+ echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
196
+ echo "EOF" >> $GITHUB_ENV
197
+
198
+ PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
199
+ echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
200
+ echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
201
+ echo "EOF" >> $GITHUB_ENV
202
+
203
+ KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
204
+ echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
205
+ echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
206
+ echo "EOF" >> $GITHUB_ENV
207
+
208
+ REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
209
+ echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
210
+ echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
211
+ echo "EOF" >> $GITHUB_ENV
212
+
213
+ - name: Extract image url
214
+ id: extract_image_url
215
+ continue-on-error: true
216
+ run: |
217
+ set -eux
218
+
219
+ echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
220
+ echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
221
+ echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
222
+ echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
223
+
224
+ - name: Comment PR
225
+ uses: mshick/add-pr-comment@v2
226
+ id: comment_pr
227
+ if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
228
+ with:
229
+ message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
230
+ message: |
231
+ <p align="center">
232
+
233
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
234
+
235
+ </p>
236
+
237
+ <details>
238
+
239
+ <summary>Expand details for performance related PR only</summary>
240
+
241
+ - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
242
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
243
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
244
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
245
+ - ${{ env.BENCH_GRAPH_XLABEL }}
246
+
247
+
248
+ <p align="center">
249
+
250
+ <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
251
+
252
+ <details>
253
+
254
+ <summary>More</summary>
255
+
256
+ ```mermaid
257
+ ${{ env.PROMPT_TOKENS_SECONDS }}
258
+ ```
259
+
260
+ </details>
261
+
262
+ <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
263
+
264
+ <details>
265
+ <summary>More</summary>
266
+
267
+ ```mermaid
268
+ ${{ env.PREDICTED_TOKENS_SECONDS }}
269
+ ```
270
+
271
+ </details>
272
+
273
+ </p>
274
+
275
+ <details>
276
+
277
+ <summary>Details</summary>
278
+
279
+ <p align="center">
280
+
281
+ <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
282
+
283
+ <details>
284
+ <summary>More</summary>
285
+
286
+ ```mermaid
287
+ ${{ env.KV_CACHE_USAGE_RATIO }}
288
+ ```
289
+
290
+ </details>
291
+
292
+ <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
293
+
294
+ <details>
295
+ <summary>More</summary>
296
+
297
+ ```mermaid
298
+ ${{ env.REQUESTS_PROCESSING }}
299
+ ```
300
+
301
+ </details>
302
+
303
+ </p>
304
+ </details>
305
+ </details>
.github/workflows/build.yml ADDED
@@ -0,0 +1,1756 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
14
+ pull_request:
15
+ types: [opened, synchronize, reopened]
16
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ contents: write # for creating release
26
+
27
+ env:
28
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
29
+ GGML_NLOOP: 3
30
+ GGML_N_THREADS: 1
31
+ LLAMA_LOG_COLORS: 1
32
+ LLAMA_LOG_PREFIX: 1
33
+ LLAMA_LOG_TIMESTAMPS: 1
34
+
35
+ jobs:
36
+ macOS-latest-cmake-arm64:
37
+ runs-on: macos-14
38
+
39
+ steps:
40
+ - name: Clone
41
+ id: checkout
42
+ uses: actions/checkout@v4
43
+ with:
44
+ fetch-depth: 0
45
+
46
+ - name: ccache
47
+ uses: hendrikmuhs/[email protected]
48
+ with:
49
+ key: macOS-latest-cmake-arm64
50
+ evict-old-files: 1d
51
+
52
+ - name: Dependencies
53
+ id: depends
54
+ continue-on-error: true
55
+ run: |
56
+ brew update
57
+
58
+ - name: Build
59
+ id: cmake_build
60
+ run: |
61
+ sysctl -a
62
+ cmake -B build \
63
+ -DCMAKE_BUILD_RPATH="@loader_path" \
64
+ -DLLAMA_FATAL_WARNINGS=ON \
65
+ -DLLAMA_CURL=ON \
66
+ -DGGML_METAL_USE_BF16=ON \
67
+ -DGGML_METAL_EMBED_LIBRARY=ON \
68
+ -DGGML_RPC=ON
69
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
70
+
71
+ - name: Test
72
+ id: cmake_test
73
+ run: |
74
+ cd build
75
+ ctest -L 'main|curl' --verbose --timeout 900
76
+
77
+ - name: Determine tag name
78
+ id: tag
79
+ shell: bash
80
+ run: |
81
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
82
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
83
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
84
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
85
+ else
86
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
87
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
88
+ fi
89
+
90
+ - name: Pack artifacts
91
+ id: pack_artifacts
92
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
93
+ run: |
94
+ cp LICENSE ./build/bin/
95
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
96
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
97
+
98
+ - name: Upload artifacts
99
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
100
+ uses: actions/upload-artifact@v4
101
+ with:
102
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
103
+ name: llama-bin-macos-arm64.zip
104
+
105
+ macOS-latest-cmake-x64:
106
+ runs-on: macos-13
107
+
108
+ steps:
109
+ - name: Clone
110
+ id: checkout
111
+ uses: actions/checkout@v4
112
+ with:
113
+ fetch-depth: 0
114
+
115
+ - name: ccache
116
+ uses: hendrikmuhs/[email protected]
117
+ with:
118
+ key: macOS-latest-cmake-x64
119
+ evict-old-files: 1d
120
+
121
+ - name: Dependencies
122
+ id: depends
123
+ continue-on-error: true
124
+ run: |
125
+ brew update
126
+
127
+ - name: Build
128
+ id: cmake_build
129
+ run: |
130
+ sysctl -a
131
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
132
+ # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
133
+ cmake -B build \
134
+ -DCMAKE_BUILD_RPATH="@loader_path" \
135
+ -DLLAMA_FATAL_WARNINGS=ON \
136
+ -DLLAMA_CURL=ON \
137
+ -DGGML_METAL=OFF \
138
+ -DGGML_RPC=ON
139
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
140
+
141
+ - name: Test
142
+ id: cmake_test
143
+ run: |
144
+ cd build
145
+ ctest -L main --verbose --timeout 900
146
+
147
+ - name: Determine tag name
148
+ id: tag
149
+ shell: bash
150
+ run: |
151
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
152
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
153
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
154
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
155
+ else
156
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
157
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
158
+ fi
159
+
160
+ - name: Pack artifacts
161
+ id: pack_artifacts
162
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
163
+ run: |
164
+ cp LICENSE ./build/bin/
165
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
166
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
167
+
168
+ - name: Upload artifacts
169
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
170
+ uses: actions/upload-artifact@v4
171
+ with:
172
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
173
+ name: llama-bin-macos-x64.zip
174
+
175
+ ubuntu-cpu-cmake:
176
+ strategy:
177
+ matrix:
178
+ include:
179
+ - build: 'x64'
180
+ os: ubuntu-22.04
181
+ - build: 'arm64'
182
+ os: ubuntu-22.04-arm
183
+
184
+ runs-on: ${{ matrix.os }}
185
+
186
+ steps:
187
+ - name: Clone
188
+ id: checkout
189
+ uses: actions/checkout@v4
190
+ with:
191
+ fetch-depth: 0
192
+
193
+ - name: ccache
194
+ uses: hendrikmuhs/[email protected]
195
+ with:
196
+ key: ubuntu-cpu-cmake
197
+ evict-old-files: 1d
198
+
199
+ - name: Dependencies
200
+ id: depends
201
+ run: |
202
+ sudo apt-get update
203
+ sudo apt-get install build-essential libcurl4-openssl-dev
204
+
205
+ - name: Build
206
+ id: cmake_build
207
+ run: |
208
+ cmake -B build \
209
+ -DLLAMA_FATAL_WARNINGS=ON \
210
+ -DLLAMA_CURL=ON \
211
+ -DGGML_RPC=ON
212
+ cmake --build build --config Release -j $(nproc)
213
+
214
+ - name: Test
215
+ id: cmake_test
216
+ run: |
217
+ cd build
218
+ ctest -L 'main|curl' --verbose --timeout 900
219
+
220
+ - name: Test llama2c conversion
221
+ id: llama2c_test
222
+ run: |
223
+ cd build
224
+ echo "Fetch tokenizer"
225
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
226
+ echo "Fetch llama2c model"
227
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
228
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
229
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
230
+
231
+ - name: Determine tag name
232
+ id: tag
233
+ shell: bash
234
+ run: |
235
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
236
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
237
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
238
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
239
+ else
240
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
241
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
242
+ fi
243
+
244
+ - name: Pack artifacts
245
+ id: pack_artifacts
246
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
247
+ run: |
248
+ cp LICENSE ./build/bin/
249
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
250
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
251
+
252
+ - name: Upload artifacts
253
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
254
+ uses: actions/upload-artifact@v4
255
+ with:
256
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
257
+ name: llama-bin-ubuntu-${{ matrix.build }}.zip
258
+
259
+ ubuntu-latest-cmake-sanitizer:
260
+ runs-on: ubuntu-latest
261
+
262
+ continue-on-error: true
263
+
264
+ strategy:
265
+ matrix:
266
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
267
+ build_type: [Debug]
268
+
269
+ steps:
270
+ - name: Clone
271
+ id: checkout
272
+ uses: actions/checkout@v4
273
+
274
+ - name: ccache
275
+ uses: hendrikmuhs/[email protected]
276
+ with:
277
+ key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
278
+ evict-old-files: 1d
279
+
280
+ - name: Dependencies
281
+ id: depends
282
+ run: |
283
+ sudo apt-get update
284
+ sudo apt-get install build-essential
285
+
286
+ - name: Build
287
+ id: cmake_build
288
+ if: ${{ matrix.sanitizer != 'THREAD' }}
289
+ run: |
290
+ cmake -B build \
291
+ -DLLAMA_FATAL_WARNINGS=ON \
292
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
293
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
294
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
295
+
296
+ - name: Build (no OpenMP)
297
+ id: cmake_build_no_openmp
298
+ if: ${{ matrix.sanitizer == 'THREAD' }}
299
+ run: |
300
+ cmake -B build \
301
+ -DLLAMA_FATAL_WARNINGS=ON \
302
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
303
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
304
+ -DGGML_OPENMP=OFF
305
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
306
+
307
+ - name: Test
308
+ id: cmake_test
309
+ run: |
310
+ cd build
311
+ ctest -L main --verbose --timeout 900
312
+
313
+ ubuntu-latest-llguidance:
314
+ runs-on: ubuntu-latest
315
+
316
+ steps:
317
+ - name: Clone
318
+ id: checkout
319
+ uses: actions/checkout@v4
320
+
321
+ - name: Dependencies
322
+ id: depends
323
+ run: |
324
+ sudo apt-get update
325
+ sudo apt-get install build-essential
326
+
327
+ - name: Build
328
+ id: cmake_build
329
+ run: |
330
+ mkdir build
331
+ cd build
332
+ cmake .. \
333
+ -DLLAMA_FATAL_WARNINGS=ON \
334
+ -DLLAMA_LLGUIDANCE=ON
335
+ cmake --build . --config Release -j $(nproc)
336
+
337
+ - name: Test
338
+ id: cmake_test
339
+ run: |
340
+ cd build
341
+ ctest -L main --verbose --timeout 900
342
+
343
+ ubuntu-latest-cmake-rpc:
344
+ runs-on: ubuntu-latest
345
+
346
+ continue-on-error: true
347
+
348
+ steps:
349
+ - name: Clone
350
+ id: checkout
351
+ uses: actions/checkout@v4
352
+
353
+ - name: ccache
354
+ uses: hendrikmuhs/[email protected]
355
+ with:
356
+ key: ubuntu-latest-cmake-rpc
357
+ evict-old-files: 1d
358
+
359
+ - name: Dependencies
360
+ id: depends
361
+ run: |
362
+ sudo apt-get update
363
+ sudo apt-get install build-essential
364
+
365
+ - name: Build
366
+ id: cmake_build
367
+ run: |
368
+ cmake -B build \
369
+ -DGGML_RPC=ON
370
+ cmake --build build --config Release -j $(nproc)
371
+
372
+ - name: Test
373
+ id: cmake_test
374
+ run: |
375
+ cd build
376
+ ctest -L main --verbose
377
+
378
+ ubuntu-22-cmake-vulkan:
379
+ runs-on: ubuntu-22.04
380
+
381
+ steps:
382
+ - name: Clone
383
+ id: checkout
384
+ uses: actions/checkout@v4
385
+ with:
386
+ fetch-depth: 0
387
+
388
+ - name: ccache
389
+ uses: hendrikmuhs/[email protected]
390
+ with:
391
+ key: ubuntu-22-cmake-vulkan
392
+ evict-old-files: 1d
393
+
394
+ - name: Dependencies
395
+ id: depends
396
+ run: |
397
+ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
398
+ sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
399
+ sudo apt-get update -y
400
+ sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
401
+
402
+ - name: Build
403
+ id: cmake_build
404
+ run: |
405
+ cmake -B build \
406
+ -DGGML_VULKAN=ON
407
+ cmake --build build --config Release -j $(nproc)
408
+
409
+ - name: Test
410
+ id: cmake_test
411
+ run: |
412
+ cd build
413
+ # This is using llvmpipe and runs slower than other backends
414
+ ctest -L main --verbose --timeout 2700
415
+
416
+ - name: Determine tag name
417
+ id: tag
418
+ shell: bash
419
+ run: |
420
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
421
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
422
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
423
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
424
+ else
425
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
426
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
427
+ fi
428
+
429
+ - name: Pack artifacts
430
+ id: pack_artifacts
431
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
432
+ run: |
433
+ cp LICENSE ./build/bin/
434
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
435
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
436
+
437
+ - name: Upload artifacts
438
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
439
+ uses: actions/upload-artifact@v4
440
+ with:
441
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
442
+ name: llama-bin-ubuntu-vulkan-x64.zip
443
+
444
+ ubuntu-22-cmake-hip:
445
+ runs-on: ubuntu-22.04
446
+ container: rocm/dev-ubuntu-22.04:6.0.2
447
+
448
+ steps:
449
+ - name: Clone
450
+ id: checkout
451
+ uses: actions/checkout@v4
452
+
453
+ - name: Dependencies
454
+ id: depends
455
+ run: |
456
+ sudo apt-get update
457
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
458
+
459
+ - name: ccache
460
+ uses: hendrikmuhs/[email protected]
461
+ with:
462
+ key: ubuntu-22-cmake-hip
463
+ evict-old-files: 1d
464
+
465
+ - name: Build with native CMake HIP support
466
+ id: cmake_build
467
+ run: |
468
+ cmake -B build -S . \
469
+ -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
470
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
471
+ -DGGML_HIP=ON
472
+ cmake --build build --config Release -j $(nproc)
473
+
474
+ - name: Build with legacy HIP support
475
+ id: cmake_build_legacy_hip
476
+ run: |
477
+ cmake -B build2 -S . \
478
+ -DCMAKE_C_COMPILER=hipcc \
479
+ -DCMAKE_CXX_COMPILER=hipcc \
480
+ -DGGML_HIP_ROCWMMA_FATTN=ON \
481
+ -DGGML_HIP=ON
482
+ cmake --build build2 --config Release -j $(nproc)
483
+
484
+ ubuntu-22-cmake-musa:
485
+ runs-on: ubuntu-22.04
486
+ container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
487
+
488
+ steps:
489
+ - name: Clone
490
+ id: checkout
491
+ uses: actions/checkout@v4
492
+
493
+ - name: Dependencies
494
+ id: depends
495
+ run: |
496
+ apt-get update
497
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
498
+
499
+ - name: ccache
500
+ uses: hendrikmuhs/[email protected]
501
+ with:
502
+ key: ubuntu-22-cmake-musa
503
+ evict-old-files: 1d
504
+
505
+ - name: Build with native CMake MUSA support
506
+ id: cmake_build
507
+ run: |
508
+ cmake -B build -S . \
509
+ -DGGML_MUSA=ON
510
+ cmake --build build --config Release -j $(nproc)
511
+
512
+ ubuntu-22-cmake-sycl:
513
+ runs-on: ubuntu-22.04
514
+
515
+ continue-on-error: true
516
+
517
+ steps:
518
+ - uses: actions/checkout@v4
519
+
520
+ - name: add oneAPI to apt
521
+ shell: bash
522
+ run: |
523
+ cd /tmp
524
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
525
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
526
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
527
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
528
+
529
+ - name: install oneAPI dpcpp compiler
530
+ shell: bash
531
+ run: |
532
+ sudo apt update
533
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
534
+
535
+ - name: install oneAPI MKL library
536
+ shell: bash
537
+ run: |
538
+ sudo apt install intel-oneapi-mkl-devel
539
+
540
+ - name: Clone
541
+ id: checkout
542
+ uses: actions/checkout@v4
543
+
544
+ - name: ccache
545
+ uses: hendrikmuhs/[email protected]
546
+ with:
547
+ key: ubuntu-22-cmake-sycl
548
+ evict-old-files: 1d
549
+
550
+ - name: Build
551
+ id: cmake_build
552
+ run: |
553
+ source /opt/intel/oneapi/setvars.sh
554
+ cmake -B build \
555
+ -DGGML_SYCL=ON \
556
+ -DCMAKE_C_COMPILER=icx \
557
+ -DCMAKE_CXX_COMPILER=icpx
558
+ cmake --build build --config Release -j $(nproc)
559
+
560
+ ubuntu-22-cmake-sycl-fp16:
561
+ runs-on: ubuntu-22.04
562
+
563
+ continue-on-error: true
564
+
565
+ steps:
566
+ - uses: actions/checkout@v4
567
+
568
+ - name: add oneAPI to apt
569
+ shell: bash
570
+ run: |
571
+ cd /tmp
572
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
573
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
574
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
575
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
576
+
577
+ - name: install oneAPI dpcpp compiler
578
+ shell: bash
579
+ run: |
580
+ sudo apt update
581
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
582
+
583
+ - name: install oneAPI MKL library
584
+ shell: bash
585
+ run: |
586
+ sudo apt install intel-oneapi-mkl-devel
587
+
588
+ - name: Clone
589
+ id: checkout
590
+ uses: actions/checkout@v4
591
+
592
+ - name: ccache
593
+ uses: hendrikmuhs/[email protected]
594
+ with:
595
+ key: ubuntu-22-cmake-sycl-fp16
596
+ evict-old-files: 1d
597
+
598
+ - name: Build
599
+ id: cmake_build
600
+ run: |
601
+ source /opt/intel/oneapi/setvars.sh
602
+ cmake -B build \
603
+ -DGGML_SYCL=ON \
604
+ -DCMAKE_C_COMPILER=icx \
605
+ -DCMAKE_CXX_COMPILER=icpx \
606
+ -DGGML_SYCL_F16=ON
607
+ cmake --build build --config Release -j $(nproc)
608
+
609
+ macOS-latest-cmake-ios:
610
+ runs-on: macos-latest
611
+
612
+ steps:
613
+ - name: Clone
614
+ id: checkout
615
+ uses: actions/checkout@v4
616
+
617
+ - name: ccache
618
+ uses: hendrikmuhs/[email protected]
619
+ with:
620
+ key: macOS-latest-cmake-ios
621
+ evict-old-files: 1d
622
+
623
+ - name: Dependencies
624
+ id: depends
625
+ continue-on-error: true
626
+ run: |
627
+ brew update
628
+
629
+ - name: Build
630
+ id: cmake_build
631
+ run: |
632
+ sysctl -a
633
+ cmake -B build -G Xcode \
634
+ -DGGML_METAL_USE_BF16=ON \
635
+ -DGGML_METAL_EMBED_LIBRARY=ON \
636
+ -DLLAMA_BUILD_EXAMPLES=OFF \
637
+ -DLLAMA_BUILD_TESTS=OFF \
638
+ -DLLAMA_BUILD_SERVER=OFF \
639
+ -DCMAKE_SYSTEM_NAME=iOS \
640
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
641
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
642
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
643
+
644
+ macOS-latest-cmake-tvos:
645
+ runs-on: macos-latest
646
+
647
+ steps:
648
+ - name: Clone
649
+ id: checkout
650
+ uses: actions/checkout@v4
651
+
652
+ - name: ccache
653
+ uses: hendrikmuhs/[email protected]
654
+ with:
655
+ key: macOS-latest-cmake-tvos
656
+ evict-old-files: 1d
657
+
658
+ - name: Dependencies
659
+ id: depends
660
+ continue-on-error: true
661
+ run: |
662
+ brew update
663
+
664
+ - name: Build
665
+ id: cmake_build
666
+ run: |
667
+ sysctl -a
668
+ cmake -B build -G Xcode \
669
+ -DGGML_METAL_USE_BF16=ON \
670
+ -DGGML_METAL_EMBED_LIBRARY=ON \
671
+ -DLLAMA_BUILD_EXAMPLES=OFF \
672
+ -DLLAMA_BUILD_TESTS=OFF \
673
+ -DLLAMA_BUILD_SERVER=OFF \
674
+ -DCMAKE_SYSTEM_NAME=tvOS \
675
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
676
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
677
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
678
+
679
+ macOS-latest-cmake-visionos:
680
+ runs-on: macos-latest
681
+
682
+ steps:
683
+ - name: Clone
684
+ id: checkout
685
+ uses: actions/checkout@v4
686
+
687
+ - name: Dependencies
688
+ id: depends
689
+ continue-on-error: true
690
+ run: |
691
+ brew update
692
+
693
+ - name: Build
694
+ id: cmake_build
695
+ run: |
696
+ sysctl -a
697
+ cmake -B build -G Xcode \
698
+ -DGGML_METAL_USE_BF16=ON \
699
+ -DGGML_METAL_EMBED_LIBRARY=ON \
700
+ -DLLAMA_BUILD_EXAMPLES=OFF \
701
+ -DLLAMA_BUILD_TESTS=OFF \
702
+ -DLLAMA_BUILD_SERVER=OFF \
703
+ -DCMAKE_SYSTEM_NAME=visionOS \
704
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
705
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
706
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
707
+
708
+ macOS-latest-swift:
709
+ runs-on: macos-latest
710
+
711
+ strategy:
712
+ matrix:
713
+ destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
714
+
715
+ steps:
716
+ - name: Clone
717
+ id: checkout
718
+ uses: actions/checkout@v4
719
+
720
+ - name: ccache
721
+ uses: hendrikmuhs/[email protected]
722
+ with:
723
+ key: macOS-latest-swift
724
+ evict-old-files: 1d
725
+
726
+ - name: Dependencies
727
+ id: depends
728
+ continue-on-error: true
729
+ run: |
730
+ brew update
731
+
732
+ - name: Build llama.cpp with CMake
733
+ id: cmake_build
734
+ run: |
735
+ sysctl -a
736
+ cmake -B build -G Xcode \
737
+ -DGGML_METAL_USE_BF16=ON \
738
+ -DGGML_METAL_EMBED_LIBRARY=ON \
739
+ -DLLAMA_BUILD_EXAMPLES=OFF \
740
+ -DLLAMA_BUILD_TESTS=OFF \
741
+ -DLLAMA_BUILD_SERVER=OFF \
742
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
743
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
744
+
745
+ - name: xcodebuild for swift package
746
+ id: xcodebuild
747
+ run: |
748
+ ./build-xcframework.sh
749
+
750
+ windows-msys2:
751
+ runs-on: windows-latest
752
+
753
+ strategy:
754
+ fail-fast: false
755
+ matrix:
756
+ include:
757
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
758
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
759
+
760
+ steps:
761
+ - name: Clone
762
+ uses: actions/checkout@v4
763
+
764
+ - name: ccache
765
+ uses: hendrikmuhs/[email protected]
766
+ with:
767
+ key: windows-msys2
768
+ variant: sccache
769
+ evict-old-files: 1d
770
+
771
+ - name: Setup ${{ matrix.sys }}
772
+ uses: msys2/setup-msys2@v2
773
+ with:
774
+ update: true
775
+ msystem: ${{matrix.sys}}
776
+ install: >-
777
+ base-devel
778
+ git
779
+ mingw-w64-${{matrix.env}}-toolchain
780
+ mingw-w64-${{matrix.env}}-cmake
781
+ mingw-w64-${{matrix.env}}-openblas
782
+
783
+ - name: Build using CMake
784
+ shell: msys2 {0}
785
+ run: |
786
+ cmake -B build
787
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
788
+
789
+ - name: Clean after building using CMake
790
+ shell: msys2 {0}
791
+ run: |
792
+ rm -rf build
793
+
794
+ - name: Build using CMake w/ OpenBLAS
795
+ shell: msys2 {0}
796
+ run: |
797
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
798
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
799
+
800
+ windows-latest-cmake:
801
+ runs-on: windows-latest
802
+
803
+ env:
804
+ OPENBLAS_VERSION: 0.3.23
805
+ SDE_VERSION: 9.33.0-2024-01-07
806
+ VULKAN_VERSION: 1.4.309.0
807
+
808
+ strategy:
809
+ matrix:
810
+ include:
811
+ - build: 'noavx-x64'
812
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
813
+ - build: 'avx2-x64'
814
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
815
+ - build: 'avx-x64'
816
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
817
+ - build: 'avx512-x64'
818
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
819
+ - build: 'openblas-x64'
820
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
821
+ - build: 'kompute-x64'
822
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
823
+ - build: 'vulkan-x64'
824
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
825
+ - build: 'llvm-arm64'
826
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
827
+ - build: 'msvc-arm64'
828
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
829
+ - build: 'llvm-arm64-opencl-adreno'
830
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
831
+
832
+ steps:
833
+ - name: Clone
834
+ id: checkout
835
+ uses: actions/checkout@v4
836
+ with:
837
+ fetch-depth: 0
838
+
839
+ - name: ccache
840
+ uses: hendrikmuhs/[email protected]
841
+ with:
842
+ key: windows-latest-cmake-${{ matrix.build }}
843
+ variant: sccache
844
+ evict-old-files: 1d
845
+
846
+ - name: Clone Kompute submodule
847
+ id: clone_kompute
848
+ if: ${{ matrix.build == 'kompute-x64' }}
849
+ run: |
850
+ git submodule update --init ggml/src/ggml-kompute/kompute
851
+
852
+ - name: Download OpenBLAS
853
+ id: get_openblas
854
+ if: ${{ matrix.build == 'openblas-x64' }}
855
+ run: |
856
+ curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
857
+ curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
858
+ mkdir $env:RUNNER_TEMP/openblas
859
+ tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
860
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
861
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
862
+ $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
863
+ & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
864
+
865
+ - name: Install Vulkan SDK
866
+ id: get_vulkan
867
+ if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
868
+ run: |
869
+ curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
870
+ & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
871
+ Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
872
+ Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
873
+
874
+ - name: Install Ninja
875
+ id: install_ninja
876
+ run: |
877
+ choco install ninja
878
+
879
+ - name: Install OpenCL Headers and Libs
880
+ id: install_opencl
881
+ if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
882
+ run: |
883
+ git clone https://github.com/KhronosGroup/OpenCL-Headers
884
+ cd OpenCL-Headers
885
+ cmake -B build `
886
+ -DBUILD_TESTING=OFF `
887
+ -DOPENCL_HEADERS_BUILD_TESTING=OFF `
888
+ -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
889
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
890
+ cmake --build build --target install
891
+ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
892
+ cd OpenCL-ICD-Loader
893
+ cmake -B build-arm64-release `
894
+ -A arm64 `
895
+ -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
896
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
897
+ cmake --build build-arm64-release --target install --config release
898
+
899
+ - name: Build
900
+ id: cmake_build
901
+ run: |
902
+ cmake -S . -B build ${{ matrix.defines }}
903
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
904
+
905
+ - name: Add libopenblas.dll
906
+ id: add_libopenblas_dll
907
+ if: ${{ matrix.build == 'openblas-x64' }}
908
+ run: |
909
+ cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
910
+ cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
911
+
912
+ - name: Check AVX512F support
913
+ id: check_avx512f
914
+ if: ${{ matrix.build == 'avx512-x64' }}
915
+ continue-on-error: true
916
+ run: |
917
+ cd build
918
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
919
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
920
+ $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
921
+ echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
922
+ & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
923
+ .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
924
+
925
+ - name: Test
926
+ id: cmake_test
927
+ # not all machines have native AVX-512
928
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
929
+ run: |
930
+ cd build
931
+ ctest -L main -C Release --verbose --timeout 900
932
+
933
+ - name: Test (Intel SDE)
934
+ id: cmake_test_sde
935
+ if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
936
+ run: |
937
+ curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
938
+ # for some weird reason windows tar doesn't like sde tar.xz
939
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
940
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
941
+ $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
942
+ cd build
943
+ $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
944
+ & $sde -future -- ctest -L main -C Release --verbose --timeout 900
945
+
946
+ - name: Determine tag name
947
+ id: tag
948
+ shell: bash
949
+ run: |
950
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
951
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
952
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
953
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
954
+ else
955
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
956
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
957
+ fi
958
+
959
+ - name: Pack artifacts
960
+ id: pack_artifacts
961
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
962
+ run: |
963
+ Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
964
+ Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
965
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
966
+
967
+ - name: Upload artifacts
968
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
969
+ uses: actions/upload-artifact@v4
970
+ with:
971
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
972
+ name: llama-bin-win-${{ matrix.build }}.zip
973
+
974
+ ubuntu-latest-cmake-cuda:
975
+ runs-on: ubuntu-latest
976
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
977
+
978
+ steps:
979
+ - name: Clone
980
+ id: checkout
981
+ uses: actions/checkout@v4
982
+ with:
983
+ fetch-depth: 0
984
+
985
+ - name: Install dependencies
986
+ env:
987
+ DEBIAN_FRONTEND: noninteractive
988
+ run: |
989
+ apt update
990
+ apt install -y cmake build-essential ninja-build libgomp1 git
991
+
992
+ - name: ccache
993
+ uses: hendrikmuhs/[email protected]
994
+ with:
995
+ key: ubuntu-latest-cmake-cuda
996
+ evict-old-files: 1d
997
+
998
+ - name: Build with CMake
999
+ run: |
1000
+ cmake -S . -B build -G Ninja \
1001
+ -DCMAKE_BUILD_TYPE=Release \
1002
+ -DCMAKE_CUDA_ARCHITECTURES=89-real \
1003
+ -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
1004
+ -DLLAMA_FATAL_WARNINGS=ON \
1005
+ -DGGML_NATIVE=OFF \
1006
+ -DGGML_CUDA=ON
1007
+ cmake --build build
1008
+
1009
+ windows-2019-cmake-cuda:
1010
+ runs-on: windows-2019
1011
+
1012
+ strategy:
1013
+ matrix:
1014
+ cuda: ['12.4', '11.7']
1015
+ build: ['cuda']
1016
+
1017
+ steps:
1018
+ - name: Clone
1019
+ id: checkout
1020
+ uses: actions/checkout@v4
1021
+ with:
1022
+ fetch-depth: 0
1023
+
1024
+ - name: Install ccache
1025
+ uses: hendrikmuhs/[email protected]
1026
+ with:
1027
+ key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
1028
+ variant: sccache
1029
+ evict-old-files: 1d
1030
+
1031
+ - name: Install Cuda Toolkit 11.7
1032
+ if: ${{ matrix.cuda == '11.7' }}
1033
+ run: |
1034
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
1035
+ choco install unzip -y
1036
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
1037
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
1038
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
1039
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
1040
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
1041
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
1042
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
1043
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
1044
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
1045
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1046
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1047
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1048
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1049
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1050
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1051
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1052
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
1053
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
1054
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
1055
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
1056
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
1057
+
1058
+ - name: Install Cuda Toolkit 12.4
1059
+ if: ${{ matrix.cuda == '12.4' }}
1060
+ run: |
1061
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
1062
+ choco install unzip -y
1063
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
1064
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
1065
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
1066
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
1067
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
1068
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
1069
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
1070
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
1071
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
1072
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
1073
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1074
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1075
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1076
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1077
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1078
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1079
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1080
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1081
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1082
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
1083
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
1084
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
1085
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
1086
+
1087
+ - name: Install Ninja
1088
+ id: install_ninja
1089
+ run: |
1090
+ choco install ninja
1091
+
1092
+ - name: Build
1093
+ id: cmake_build
1094
+ shell: cmd
1095
+ run: |
1096
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
1097
+ cmake -S . -B build -G "Ninja Multi-Config" ^
1098
+ -DLLAMA_BUILD_SERVER=ON ^
1099
+ -DGGML_NATIVE=OFF ^
1100
+ -DGGML_CUDA=ON ^
1101
+ -DGGML_RPC=ON
1102
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
1103
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
1104
+ cmake --build build --config Release
1105
+
1106
+ - name: Determine tag name
1107
+ id: tag
1108
+ shell: bash
1109
+ run: |
1110
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1111
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1112
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1113
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1114
+ else
1115
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1116
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1117
+ fi
1118
+
1119
+ - name: Pack artifacts
1120
+ id: pack_artifacts
1121
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1122
+ run: |
1123
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
1124
+
1125
+ - name: Upload artifacts
1126
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1127
+ uses: actions/upload-artifact@v4
1128
+ with:
1129
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
1130
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
1131
+
1132
+ - name: Copy and pack Cuda runtime
1133
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
1134
+ run: |
1135
+ echo "Cuda install location: ${{ env.CUDA_PATH }}"
1136
+ $dst='.\build\bin\cudart\'
1137
+ robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
1138
+ robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
1139
+ 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
1140
+
1141
+ - name: Upload Cuda runtime
1142
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1143
+ uses: actions/upload-artifact@v4
1144
+ with:
1145
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
1146
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
1147
+
1148
+ windows-latest-cmake-sycl:
1149
+ runs-on: windows-latest
1150
+
1151
+ defaults:
1152
+ run:
1153
+ shell: bash
1154
+
1155
+ env:
1156
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
1157
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
1158
+ ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
1159
+ steps:
1160
+ - name: Clone
1161
+ id: checkout
1162
+ uses: actions/checkout@v4
1163
+ with:
1164
+ fetch-depth: 0
1165
+
1166
+ - name: ccache
1167
+ uses: hendrikmuhs/[email protected]
1168
+ with:
1169
+ key: windows-latest-cmake-sycl
1170
+ variant: sccache
1171
+ evict-old-files: 1d
1172
+
1173
+ - name: Install
1174
+ run: |
1175
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
1176
+
1177
+ - name: Build
1178
+ id: cmake_build
1179
+ run: examples/sycl/win-build-sycl.bat
1180
+
1181
+ - name: Determine tag name
1182
+ id: tag
1183
+ shell: bash
1184
+ run: |
1185
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1186
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1187
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1188
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1189
+ else
1190
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1191
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1192
+ fi
1193
+
1194
+ - name: Build the release package
1195
+ id: pack_artifacts
1196
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1197
+ run: |
1198
+ echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
1199
+
1200
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
1201
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
1202
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
1203
+
1204
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
1205
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
1206
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
1207
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
1208
+
1209
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
1210
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
1211
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
1212
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
1213
+
1214
+ cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
1215
+ cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
1216
+
1217
+ echo "cp oneAPI running time dll files to ./build/bin done"
1218
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
1219
+
1220
+ - name: Upload the release package
1221
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1222
+ uses: actions/upload-artifact@v4
1223
+ with:
1224
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
1225
+ name: llama-bin-win-sycl-x64.zip
1226
+
1227
+ windows-latest-cmake-hip:
1228
+ if: ${{ github.event.inputs.create_release != 'true' }}
1229
+ runs-on: windows-latest
1230
+
1231
+ steps:
1232
+ - name: Clone
1233
+ id: checkout
1234
+ uses: actions/checkout@v4
1235
+
1236
+ - name: Clone rocWMMA repository
1237
+ id: clone_rocwmma
1238
+ run: |
1239
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1240
+
1241
+ - name: Install
1242
+ id: depends
1243
+ run: |
1244
+ $ErrorActionPreference = "Stop"
1245
+ write-host "Downloading AMD HIP SDK Installer"
1246
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1247
+ write-host "Installing AMD HIP SDK"
1248
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1249
+ write-host "Completed AMD HIP SDK installation"
1250
+
1251
+ - name: Verify ROCm
1252
+ id: verify
1253
+ run: |
1254
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1255
+
1256
+ - name: Install ccache
1257
+ uses: hendrikmuhs/[email protected]
1258
+ with:
1259
+ key: ${{ github.job }}
1260
+ evict-old-files: 1d
1261
+
1262
+ - name: Build
1263
+ id: cmake_build
1264
+ run: |
1265
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1266
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1267
+ cmake -G "Unix Makefiles" -B build -S . `
1268
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1269
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1270
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1271
+ -DCMAKE_BUILD_TYPE=Release `
1272
+ -DGGML_HIP=ON `
1273
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
1274
+ -DGGML_RPC=ON
1275
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1276
+
1277
+ windows-latest-cmake-hip-release:
1278
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1279
+ runs-on: windows-latest
1280
+
1281
+ strategy:
1282
+ matrix:
1283
+ gpu_target: [gfx1100, gfx1101, gfx1030]
1284
+
1285
+ steps:
1286
+ - name: Clone
1287
+ id: checkout
1288
+ uses: actions/checkout@v4
1289
+ with:
1290
+ fetch-depth: 0
1291
+
1292
+ - name: Clone rocWMMA repository
1293
+ id: clone_rocwmma
1294
+ run: |
1295
+ git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1296
+
1297
+ - name: ccache
1298
+ uses: hendrikmuhs/[email protected]
1299
+ with:
1300
+ key: windows-latest-cmake-hip-release
1301
+ evict-old-files: 1d
1302
+
1303
+ - name: Install
1304
+ id: depends
1305
+ run: |
1306
+ $ErrorActionPreference = "Stop"
1307
+ write-host "Downloading AMD HIP SDK Installer"
1308
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1309
+ write-host "Installing AMD HIP SDK"
1310
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1311
+ write-host "Completed AMD HIP SDK installation"
1312
+
1313
+ - name: Verify ROCm
1314
+ id: verify
1315
+ run: |
1316
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1317
+
1318
+ - name: Build
1319
+ id: cmake_build
1320
+ run: |
1321
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1322
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1323
+ cmake -G "Unix Makefiles" -B build -S . `
1324
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1325
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1326
+ -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1327
+ -DCMAKE_BUILD_TYPE=Release `
1328
+ -DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
1329
+ -DGGML_HIP_ROCWMMA_FATTN=ON `
1330
+ -DGGML_HIP=ON `
1331
+ -DGGML_RPC=ON
1332
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1333
+ md "build\bin\rocblas\library\"
1334
+ cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
1335
+ cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
1336
+ cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
1337
+
1338
+ - name: Determine tag name
1339
+ id: tag
1340
+ shell: bash
1341
+ run: |
1342
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1343
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1344
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1345
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1346
+ else
1347
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1348
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1349
+ fi
1350
+
1351
+ - name: Pack artifacts
1352
+ id: pack_artifacts
1353
+ run: |
1354
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
1355
+
1356
+ - name: Upload artifacts
1357
+ uses: actions/upload-artifact@v4
1358
+ with:
1359
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1360
+ name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1361
+
1362
+ ios-xcode-build:
1363
+ runs-on: macos-latest
1364
+
1365
+ steps:
1366
+ - name: Checkout code
1367
+ uses: actions/checkout@v4
1368
+ with:
1369
+ fetch-depth: 0
1370
+
1371
+ - name: Build
1372
+ id: cmake_build
1373
+ run: |
1374
+ sysctl -a
1375
+ cmake -B build -G Xcode \
1376
+ -DGGML_METAL_USE_BF16=ON \
1377
+ -DGGML_METAL_EMBED_LIBRARY=ON \
1378
+ -DLLAMA_BUILD_EXAMPLES=OFF \
1379
+ -DLLAMA_BUILD_TESTS=OFF \
1380
+ -DLLAMA_BUILD_SERVER=OFF \
1381
+ -DCMAKE_SYSTEM_NAME=iOS \
1382
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1383
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1384
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1385
+
1386
+ - name: xcodebuild for swift package
1387
+ id: xcodebuild
1388
+ run: |
1389
+ ./build-xcframework.sh
1390
+
1391
+ - name: Build Xcode project
1392
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
1393
+
1394
+ - name: Determine tag name
1395
+ id: tag
1396
+ shell: bash
1397
+ run: |
1398
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1399
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1400
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1401
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1402
+ else
1403
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1404
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1405
+ fi
1406
+
1407
+ - name: Pack artifacts
1408
+ id: pack_artifacts
1409
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1410
+ run: |
1411
+ zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
1412
+
1413
+ - name: Upload artifacts
1414
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1415
+ uses: actions/upload-artifact@v4
1416
+ with:
1417
+ path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
1418
+ name: llama-${{ steps.tag.outputs.name }}-xcframework
1419
+
1420
+ android-build:
1421
+ runs-on: ubuntu-latest
1422
+
1423
+ steps:
1424
+ - name: Clone
1425
+ uses: actions/checkout@v4
1426
+
1427
+ - name: ccache
1428
+ uses: hendrikmuhs/[email protected]
1429
+ with:
1430
+ key: android-build
1431
+ evict-old-files: 1d
1432
+
1433
+ - name: Set up JDK
1434
+ uses: actions/setup-java@v3
1435
+ with:
1436
+ java-version: 17
1437
+ distribution: zulu
1438
+
1439
+ - name: Setup Android SDK
1440
+ uses: android-actions/setup-android@v3
1441
+ with:
1442
+ log-accepted-android-sdk-licenses: false
1443
+
1444
+ - name: Build
1445
+ run: |
1446
+ cd examples/llama.android
1447
+
1448
+ ./gradlew build --no-daemon
1449
+
1450
+ release:
1451
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1452
+
1453
+ runs-on: ubuntu-latest
1454
+
1455
+ needs:
1456
+ - ubuntu-cpu-cmake
1457
+ - ubuntu-22-cmake-vulkan
1458
+ - windows-latest-cmake
1459
+ - windows-2019-cmake-cuda
1460
+ - windows-latest-cmake-sycl
1461
+ - windows-latest-cmake-hip-release
1462
+ - macOS-latest-cmake-arm64
1463
+ - macOS-latest-cmake-x64
1464
+
1465
+ steps:
1466
+ - name: Clone
1467
+ id: checkout
1468
+ uses: actions/checkout@v4
1469
+ with:
1470
+ fetch-depth: 0
1471
+
1472
+ - name: ccache
1473
+ uses: hendrikmuhs/[email protected]
1474
+ with:
1475
+ key: release
1476
+ evict-old-files: 1d
1477
+
1478
+ - name: Determine tag name
1479
+ id: tag
1480
+ shell: bash
1481
+ run: |
1482
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1483
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1484
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1485
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1486
+ else
1487
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1488
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1489
+ fi
1490
+
1491
+ - name: Download artifacts
1492
+ id: download-artifact
1493
+ uses: actions/download-artifact@v4
1494
+ with:
1495
+ path: ./artifact
1496
+
1497
+ - name: Move artifacts
1498
+ id: move_artifacts
1499
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
1500
+
1501
+ - name: Create release
1502
+ id: create_release
1503
+ uses: ggml-org/action-create-release@v1
1504
+ env:
1505
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1506
+ with:
1507
+ tag_name: ${{ steps.tag.outputs.name }}
1508
+
1509
+ - name: Upload release
1510
+ id: upload_release
1511
+ uses: actions/github-script@v3
1512
+ with:
1513
+ github-token: ${{secrets.GITHUB_TOKEN}}
1514
+ script: |
1515
+ const path = require('path');
1516
+ const fs = require('fs');
1517
+ const release_id = '${{ steps.create_release.outputs.id }}';
1518
+ for (let file of await fs.readdirSync('./artifact/release')) {
1519
+ if (path.extname(file) === '.zip') {
1520
+ console.log('uploadReleaseAsset', file);
1521
+ await github.repos.uploadReleaseAsset({
1522
+ owner: context.repo.owner,
1523
+ repo: context.repo.repo,
1524
+ release_id: release_id,
1525
+ name: file,
1526
+ data: await fs.readFileSync(`./artifact/release/${file}`)
1527
+ });
1528
+ }
1529
+ }
1530
+
1531
+ # ubuntu-latest-gcc:
1532
+ # runs-on: ubuntu-latest
1533
+ #
1534
+ # strategy:
1535
+ # matrix:
1536
+ # build: [Debug, Release]
1537
+ #
1538
+ # steps:
1539
+ # - name: Clone
1540
+ # uses: actions/checkout@v4
1541
+ #
1542
+ # - name: Dependencies
1543
+ # run: |
1544
+ # sudo apt-get update
1545
+ # sudo apt-get install build-essential
1546
+ # sudo apt-get install cmake
1547
+ #
1548
+ # - name: Configure
1549
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1550
+ #
1551
+ # - name: Build
1552
+ # run: |
1553
+ # make
1554
+ #
1555
+ # ubuntu-latest-clang:
1556
+ # runs-on: ubuntu-latest
1557
+ #
1558
+ # strategy:
1559
+ # matrix:
1560
+ # build: [Debug, Release]
1561
+ #
1562
+ # steps:
1563
+ # - name: Clone
1564
+ # uses: actions/checkout@v4
1565
+ #
1566
+ # - name: Dependencies
1567
+ # run: |
1568
+ # sudo apt-get update
1569
+ # sudo apt-get install build-essential
1570
+ # sudo apt-get install cmake
1571
+ #
1572
+ # - name: Configure
1573
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
1574
+ #
1575
+ # - name: Build
1576
+ # run: |
1577
+ # make
1578
+ #
1579
+ # ubuntu-latest-gcc-sanitized:
1580
+ # runs-on: ubuntu-latest
1581
+ #
1582
+ # strategy:
1583
+ # matrix:
1584
+ # sanitizer: [ADDRESS, THREAD, UNDEFINED]
1585
+ #
1586
+ # steps:
1587
+ # - name: Clone
1588
+ # uses: actions/checkout@v4
1589
+ #
1590
+ # - name: Dependencies
1591
+ # run: |
1592
+ # sudo apt-get update
1593
+ # sudo apt-get install build-essential
1594
+ # sudo apt-get install cmake
1595
+ #
1596
+ # - name: Configure
1597
+ # run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
1598
+ #
1599
+ # - name: Build
1600
+ # run: |
1601
+ # make
1602
+ #
1603
+ # windows:
1604
+ # runs-on: windows-latest
1605
+ #
1606
+ # strategy:
1607
+ # matrix:
1608
+ # build: [Release]
1609
+ # arch: [Win32, x64]
1610
+ # include:
1611
+ # - arch: Win32
1612
+ # s2arc: x86
1613
+ # - arch: x64
1614
+ # s2arc: x64
1615
+ #
1616
+ # steps:
1617
+ # - name: Clone
1618
+ # uses: actions/checkout@v4
1619
+ #
1620
+ # - name: Add msbuild to PATH
1621
+ # uses: microsoft/setup-msbuild@v1
1622
+ #
1623
+ # - name: Configure
1624
+ # run: >
1625
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1626
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1627
+ #
1628
+ # - name: Build
1629
+ # run: |
1630
+ # cd ./build
1631
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1632
+ #
1633
+ # - name: Upload binaries
1634
+ # uses: actions/upload-artifact@v4
1635
+ # with:
1636
+ # name: llama-bin-${{ matrix.arch }}
1637
+ # path: build/bin/${{ matrix.build }}
1638
+ #
1639
+ # windows-blas:
1640
+ # runs-on: windows-latest
1641
+ #
1642
+ # strategy:
1643
+ # matrix:
1644
+ # build: [Release]
1645
+ # arch: [Win32, x64]
1646
+ # blas: [ON]
1647
+ # include:
1648
+ # - arch: Win32
1649
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
1650
+ # s2arc: x86
1651
+ # - arch: x64
1652
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
1653
+ # s2arc: x64
1654
+ #
1655
+ # steps:
1656
+ # - name: Clone
1657
+ # uses: actions/checkout@v4
1658
+ #
1659
+ # - name: Add msbuild to PATH
1660
+ # uses: microsoft/setup-msbuild@v1
1661
+ #
1662
+ # - name: Fetch OpenBLAS
1663
+ # if: matrix.blas == 'ON'
1664
+ # run: |
1665
+ # C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
1666
+ # 7z x blas.zip -oblas -y
1667
+ # copy blas/include/cblas.h .
1668
+ # copy blas/include/openblas_config.h .
1669
+ # echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
1670
+ #
1671
+ # - name: Configure
1672
+ # run: >
1673
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1674
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1675
+ # -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
1676
+ # -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
1677
+ #
1678
+ # - name: Build
1679
+ # run: |
1680
+ # cd ./build
1681
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1682
+ #
1683
+ # - name: Copy libopenblas.dll
1684
+ # if: matrix.blas == 'ON'
1685
+ # run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
1686
+ #
1687
+ # - name: Upload binaries
1688
+ # if: matrix.blas == 'ON'
1689
+ # uses: actions/upload-artifact@v4
1690
+ # with:
1691
+ # name: llama-blas-bin-${{ matrix.arch }}
1692
+ # path: build/bin/${{ matrix.build }}
1693
+ #
1694
+ # emscripten:
1695
+ # runs-on: ubuntu-latest
1696
+ #
1697
+ # strategy:
1698
+ # matrix:
1699
+ # build: [Release]
1700
+ #
1701
+ # steps:
1702
+ # - name: Clone
1703
+ # uses: actions/checkout@v4
1704
+ #
1705
+ # - name: Dependencies
1706
+ # run: |
1707
+ # wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
1708
+ # tar -xvf master.tar.gz
1709
+ # emsdk-master/emsdk update
1710
+ # emsdk-master/emsdk install latest
1711
+ # emsdk-master/emsdk activate latest
1712
+ #
1713
+ # - name: Configure
1714
+ # run: echo "tmp"
1715
+ #
1716
+ # - name: Build
1717
+ # run: |
1718
+ # pushd emsdk-master
1719
+ # source ./emsdk_env.sh
1720
+ # popd
1721
+ # emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1722
+ # make
1723
+
1724
+ openEuler-latest-cmake-cann:
1725
+ if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
1726
+ defaults:
1727
+ run:
1728
+ shell: bash -el {0}
1729
+ runs-on: ubuntu-24.04-arm
1730
+ strategy:
1731
+ matrix:
1732
+ cann:
1733
+ - '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
1734
+ device:
1735
+ - 'ascend910b3'
1736
+ build:
1737
+ - 'Release'
1738
+ container: ascendai/cann:${{ matrix.cann }}
1739
+ steps:
1740
+ - name: Checkout
1741
+ uses: actions/checkout@v4
1742
+
1743
+ - name: Dependencies
1744
+ run: |
1745
+ yum update -y
1746
+ yum install -y git gcc gcc-c++ make cmake
1747
+
1748
+ - name: Build
1749
+ run: |
1750
+ export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1751
+
1752
+ cmake -S . -B build \
1753
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1754
+ -DGGML_CANN=on \
1755
+ -DSOC_TYPE=${{ matrix.device }}
1756
+ cmake --build build -j $(nproc)
.github/workflows/close-issue.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Close inactive issues
2
+ on:
3
+ schedule:
4
+ - cron: "42 0 * * *"
5
+
6
+ # Fine-grant permission
7
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8
+ permissions:
9
+ issues: write
10
+
11
+ jobs:
12
+ close-issues:
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ issues: write
16
+ pull-requests: write
17
+ steps:
18
+ - uses: actions/stale@v5
19
+ with:
20
+ exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
21
+ days-before-issue-stale: 30
22
+ days-before-issue-close: 14
23
+ stale-issue-label: "stale"
24
+ close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
25
+ days-before-pr-stale: -1
26
+ days-before-pr-close: -1
27
+ operations-per-run: 10000
28
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
.github/workflows/docker.yml ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+
6
+ # GitHub recommends pinning actions to a commit SHA.
7
+ # To get a newer version, you will need to update the SHA.
8
+ # You can also reference a tag or branch, but the action may change without warning.
9
+
10
+ name: Publish Docker image
11
+
12
+ on:
13
+ workflow_dispatch: # allows manual triggering
14
+ schedule:
15
+ # Rebuild daily rather than on every push because it is expensive
16
+ - cron: '12 4 * * *'
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ packages: write
26
+
27
+ jobs:
28
+ push_to_registry:
29
+ name: Push Docker image to Docker Hub
30
+
31
+ runs-on: ubuntu-22.04
32
+ env:
33
+ COMMIT_SHA: ${{ github.sha }}
34
+ strategy:
35
+ fail-fast: false
36
+ matrix:
37
+ config:
38
+ # Multi-stage build
39
+ - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
40
+ - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
41
+ - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
42
+ - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
43
+ - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
44
+ # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
45
+ #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
46
+ steps:
47
+ - name: Check out the repo
48
+ uses: actions/checkout@v4
49
+ with:
50
+ fetch-depth: 0 # preserve git history, so we can determine the build number
51
+
52
+ - name: Set up QEMU
53
+ uses: docker/setup-qemu-action@v3
54
+ with:
55
+ image: tonistiigi/binfmt:qemu-v7.0.0-28
56
+
57
+ - name: Set up Docker Buildx
58
+ uses: docker/setup-buildx-action@v3
59
+
60
+ - name: Log in to Docker Hub
61
+ uses: docker/login-action@v2
62
+ with:
63
+ registry: ghcr.io
64
+ username: ${{ github.repository_owner }}
65
+ password: ${{ secrets.GITHUB_TOKEN }}
66
+
67
+ - name: Determine tag name
68
+ id: tag
69
+ shell: bash
70
+ run: |
71
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
72
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
73
+ REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
74
+ REPO_NAME="${{ github.event.repository.name }}"
75
+
76
+ # determine tag name postfix (build number, commit hash)
77
+ if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
78
+ TAG_POSTFIX="-b${BUILD_NUMBER}"
79
+ else
80
+ SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
81
+ TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
82
+ fi
83
+ # list all tags possible
84
+ if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
85
+ TYPE=""
86
+ else
87
+ TYPE="-${{ matrix.config.tag }}"
88
+ fi
89
+ PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
90
+ FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
91
+ LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
92
+ SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
93
+ echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
94
+ echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
95
+ echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
96
+ echo "full_output_tags=$FULLTAGS" # print out for debugging
97
+ echo "light_output_tags=$LIGHTTAGS" # print out for debugging
98
+ echo "server_output_tags=$SERVERTAGS" # print out for debugging
99
+ env:
100
+ GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
101
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
102
+
103
+ - name: Free Disk Space (Ubuntu)
104
+ if: ${{ matrix.config.free_disk_space == true }}
105
+ uses: ggml-org/[email protected]
106
+ with:
107
+ # this might remove tools that are actually needed,
108
+ # if set to "true" but frees about 6 GB
109
+ tool-cache: false
110
+
111
+ # all of these default to true, but feel free to set to
112
+ # "false" if necessary for your workflow
113
+ android: true
114
+ dotnet: true
115
+ haskell: true
116
+ large-packages: true
117
+ docker-images: true
118
+ swap-storage: true
119
+
120
+ - name: Build and push Full Docker image (tagged + versioned)
121
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
122
+ uses: docker/build-push-action@v6
123
+ with:
124
+ context: .
125
+ push: true
126
+ platforms: ${{ matrix.config.platforms }}
127
+ # tag list is generated from step above
128
+ tags: ${{ steps.tag.outputs.full_output_tags }}
129
+ file: ${{ matrix.config.dockerfile }}
130
+ target: full
131
+ provenance: false
132
+ # using github experimental cache
133
+ cache-from: type=gha
134
+ cache-to: type=gha,mode=max
135
+ # return to this if the experimental github cache is having issues
136
+ #cache-to: type=local,dest=/tmp/.buildx-cache
137
+ #cache-from: type=local,src=/tmp/.buildx-cache
138
+
139
+ - name: Build and push Light Docker image (tagged + versioned)
140
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
141
+ uses: docker/build-push-action@v6
142
+ with:
143
+ context: .
144
+ push: true
145
+ platforms: ${{ matrix.config.platforms }}
146
+ # tag list is generated from step above
147
+ tags: ${{ steps.tag.outputs.light_output_tags }}
148
+ file: ${{ matrix.config.dockerfile }}
149
+ target: light
150
+ provenance: false
151
+ # using github experimental cache
152
+ cache-from: type=gha
153
+ cache-to: type=gha,mode=max
154
+ # return to this if the experimental github cache is having issues
155
+ #cache-to: type=local,dest=/tmp/.buildx-cache
156
+ #cache-from: type=local,src=/tmp/.buildx-cache
157
+
158
+ - name: Build and push Server Docker image (tagged + versioned)
159
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
160
+ uses: docker/build-push-action@v6
161
+ with:
162
+ context: .
163
+ push: true
164
+ platforms: ${{ matrix.config.platforms }}
165
+ # tag list is generated from step above
166
+ tags: ${{ steps.tag.outputs.server_output_tags }}
167
+ file: ${{ matrix.config.dockerfile }}
168
+ target: server
169
+ provenance: false
170
+ # using github experimental cache
171
+ cache-from: type=gha
172
+ cache-to: type=gha,mode=max
173
+ # return to this if the experimental github cache is having issues
174
+ #cache-to: type=local,dest=/tmp/.buildx-cache
175
+ #cache-from: type=local,src=/tmp/.buildx-cache
.github/workflows/editorconfig.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: EditorConfig Checker
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ pull_request:
14
+ branches:
15
+ - master
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ editorconfig:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: editorconfig-checker/action-editorconfig-checker@v2
27
+ with:
28
+ version: v3.0.3
29
+ - run: editorconfig-checker
.github/workflows/gguf-publish.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will upload a Python Package using Twine when a GGUF release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ # See `gguf-py/README.md` for how to make a release.
5
+
6
+ # This workflow uses actions that are not certified by GitHub.
7
+ # They are provided by a third-party and are governed by
8
+ # separate terms of service, privacy policy, and support
9
+ # documentation.
10
+
11
+ name: Upload Python Package
12
+
13
+ on:
14
+ workflow_dispatch:
15
+ push:
16
+ # Pattern matched against refs/tags
17
+ tags:
18
+ - 'gguf-v*' # Push events to every version tag
19
+
20
+
21
+ jobs:
22
+ deploy:
23
+
24
+ runs-on: ubuntu-latest
25
+
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - name: Set up Python
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: '3.9.x'
32
+ - name: Install dependencies
33
+ run: |
34
+ cd gguf-py
35
+ python -m pip install poetry
36
+ poetry install
37
+
38
+ - name: Build package
39
+ run: cd gguf-py && poetry build
40
+ - name: Publish package
41
+ uses: pypa/gh-action-pypi-publish@release/v1
42
+ with:
43
+ password: ${{ secrets.PYPI_API_TOKEN }}
44
+ packages-dir: gguf-py/dist
.github/workflows/labeler.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Pull Request Labeler"
2
+ on:
3
+ - pull_request_target
4
+
5
+ jobs:
6
+ labeler:
7
+ permissions:
8
+ contents: read
9
+ pull-requests: write
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ repository: "ggml-org/llama.cpp"
15
+ - uses: actions/labeler@v5
16
+ with:
17
+ configuration-path: '.github/labeler.yml'
.github/workflows/python-check-requirements.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python check requirements.txt
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - '.github/workflows/python-check-requirements.yml'
7
+ - 'scripts/check-requirements.sh'
8
+ - 'convert*.py'
9
+ - '**/requirements*.txt'
10
+ pull_request:
11
+ paths:
12
+ - '.github/workflows/python-check-requirements.yml'
13
+ - 'scripts/check-requirements.sh'
14
+ - 'convert*.py'
15
+ - '**/requirements*.txt'
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ python-check-requirements:
23
+ runs-on: ubuntu-latest
24
+ name: check-requirements
25
+ steps:
26
+ - name: Check out source repository
27
+ uses: actions/checkout@v4
28
+ - name: Set up Python environment
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.11"
32
+ - name: Run check-requirements.sh script
33
+ run: bash scripts/check-requirements.sh
.github/workflows/python-lint.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: flake8 Lint
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
8
+ pull_request:
9
+ types: [opened, synchronize, reopened]
10
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
11
+
12
+ concurrency:
13
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
14
+ cancel-in-progress: true
15
+
16
+ jobs:
17
+ flake8-lint:
18
+ runs-on: ubuntu-latest
19
+ name: Lint
20
+ steps:
21
+ - name: Check out source repository
22
+ uses: actions/checkout@v4
23
+ - name: Set up Python environment
24
+ uses: actions/setup-python@v5
25
+ with:
26
+ python-version: "3.11"
27
+ - name: flake8 Lint
28
+ uses: py-actions/flake8@v2
29
+ with:
30
+ plugins: "flake8-no-print"
.github/workflows/python-type-check.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python Type-Check
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - '.github/workflows/python-type-check.yml'
7
+ - 'pyrightconfig.json'
8
+ - '**.py'
9
+ - '**/requirements*.txt'
10
+ pull_request:
11
+ paths:
12
+ - '.github/workflows/python-type-check.yml'
13
+ - 'pyrightconfig.json'
14
+ - '**.py'
15
+ - '**/requirements*.txt'
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ python-type-check:
23
+ runs-on: ubuntu-latest
24
+ name: pyright type-check
25
+ steps:
26
+ - name: Check out source repository
27
+ uses: actions/checkout@v4
28
+ - name: Set up Python environment
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.11"
32
+ - name: Install Python dependencies
33
+ # TODO: use a venv
34
+ run: pip install -r requirements/requirements-all.txt
35
+ - name: Type-check with Pyright
36
+ uses: jakebailey/pyright-action@v2
37
+ with:
38
+ version: 1.1.382
39
+ level: warning
40
+ warnings: true
.github/workflows/server.yml ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Server build and tests
2
+ name: Server
3
+
4
+ on:
5
+ workflow_dispatch: # allows manual triggering
6
+ inputs:
7
+ sha:
8
+ description: 'Commit SHA1 to build'
9
+ required: false
10
+ type: string
11
+ slow_tests:
12
+ description: 'Run slow tests'
13
+ required: true
14
+ type: boolean
15
+ push:
16
+ branches:
17
+ - master
18
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
19
+ pull_request:
20
+ types: [opened, synchronize, reopened]
21
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
22
+
23
+ env:
24
+ LLAMA_LOG_COLORS: 1
25
+ LLAMA_LOG_PREFIX: 1
26
+ LLAMA_LOG_TIMESTAMPS: 1
27
+ LLAMA_LOG_VERBOSITY: 10
28
+
29
+ concurrency:
30
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
31
+ cancel-in-progress: true
32
+
33
+ jobs:
34
+ server:
35
+ runs-on: ubuntu-latest
36
+
37
+ strategy:
38
+ matrix:
39
+ sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
40
+ build_type: [RelWithDebInfo]
41
+ include:
42
+ - build_type: Release
43
+ sanitizer: ""
44
+ fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
45
+
46
+ steps:
47
+ - name: Dependencies
48
+ id: depends
49
+ run: |
50
+ sudo apt-get update
51
+ sudo apt-get -y install \
52
+ build-essential \
53
+ xxd \
54
+ git \
55
+ cmake \
56
+ curl \
57
+ wget \
58
+ language-pack-en \
59
+ libcurl4-openssl-dev
60
+
61
+ - name: Clone
62
+ id: checkout
63
+ uses: actions/checkout@v4
64
+ with:
65
+ fetch-depth: 0
66
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
67
+
68
+ - name: Python setup
69
+ id: setup_python
70
+ uses: actions/setup-python@v5
71
+ with:
72
+ python-version: '3.11'
73
+
74
+ - name: Tests dependencies
75
+ id: test_dependencies
76
+ run: |
77
+ pip install -r examples/server/tests/requirements.txt
78
+
79
+ # Setup nodejs (to be used for verifying bundled index.html)
80
+ - uses: actions/setup-node@v4
81
+ with:
82
+ node-version: '22.11.0'
83
+
84
+ - name: WebUI - Install dependencies
85
+ id: webui_lint
86
+ run: |
87
+ cd examples/server/webui
88
+ npm ci
89
+
90
+ - name: WebUI - Check code format
91
+ id: webui_format
92
+ run: |
93
+ git config --global --add safe.directory $(realpath .)
94
+ cd examples/server/webui
95
+ git status
96
+
97
+ npm run format
98
+ git status
99
+ modified_files="$(git status -s)"
100
+ echo "Modified files: ${modified_files}"
101
+ if [ -n "${modified_files}" ]; then
102
+ echo "Files do not follow coding style. To fix: npm run format"
103
+ echo "${modified_files}"
104
+ exit 1
105
+ fi
106
+
107
+ - name: Verify bundled index.html
108
+ id: verify_server_index_html
109
+ run: |
110
+ git config --global --add safe.directory $(realpath .)
111
+ cd examples/server/webui
112
+ git status
113
+
114
+ npm run build
115
+ git status
116
+ modified_files="$(git status -s)"
117
+ echo "Modified files: ${modified_files}"
118
+ if [ -n "${modified_files}" ]; then
119
+ echo "Repository is dirty or server/webui is not built as expected"
120
+ echo "Hint: You may need to follow Web UI build guide in server/README.md"
121
+ echo "${modified_files}"
122
+ exit 1
123
+ fi
124
+
125
+ - name: Build (no OpenMP)
126
+ id: cmake_build_no_openmp
127
+ if: ${{ matrix.sanitizer == 'THREAD' }}
128
+ run: |
129
+ cmake -B build \
130
+ -DGGML_NATIVE=OFF \
131
+ -DLLAMA_BUILD_SERVER=ON \
132
+ -DLLAMA_CURL=ON \
133
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
134
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
135
+ -DGGML_OPENMP=OFF ;
136
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
137
+
138
+ - name: Build (sanitizers)
139
+ id: cmake_build_sanitizers
140
+ if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
141
+ run: |
142
+ cmake -B build \
143
+ -DGGML_NATIVE=OFF \
144
+ -DLLAMA_BUILD_SERVER=ON \
145
+ -DLLAMA_CURL=ON \
146
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
147
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
148
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
149
+
150
+ - name: Build (sanitizers)
151
+ id: cmake_build
152
+ if: ${{ matrix.sanitizer == '' }}
153
+ run: |
154
+ cmake -B build \
155
+ -DGGML_NATIVE=OFF \
156
+ -DLLAMA_BUILD_SERVER=ON \
157
+ -DLLAMA_CURL=ON \
158
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
159
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
160
+
161
+ - name: Tests
162
+ id: server_integration_tests
163
+ if: ${{ matrix.sanitizer == '' }}
164
+ env:
165
+ GITHUB_ACTIONS: "true"
166
+ run: |
167
+ cd examples/server/tests
168
+ ./tests.sh
169
+
170
+ - name: Tests (sanitizers)
171
+ id: server_integration_tests_sanitizers
172
+ if: ${{ matrix.sanitizer != '' }}
173
+ run: |
174
+ cd examples/server/tests
175
+ LLAMA_SANITIZE=1 ./tests.sh
176
+
177
+ - name: Slow tests
178
+ id: server_integration_tests_slow
179
+ if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
180
+ run: |
181
+ cd examples/server/tests
182
+ SLOW_TESTS=1 ./tests.sh
183
+
184
+
185
+ server-windows:
186
+ runs-on: windows-2019
187
+
188
+ steps:
189
+ - name: Clone
190
+ id: checkout
191
+ uses: actions/checkout@v4
192
+ with:
193
+ fetch-depth: 0
194
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
195
+
196
+ - name: libCURL
197
+ id: get_libcurl
198
+ env:
199
+ CURL_VERSION: 8.6.0_6
200
+ run: |
201
+ curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
202
+ mkdir $env:RUNNER_TEMP/libcurl
203
+ tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
204
+
205
+ - name: Build
206
+ id: cmake_build
207
+ run: |
208
+ cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
209
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
210
+
211
+ - name: Python setup
212
+ id: setup_python
213
+ uses: actions/setup-python@v5
214
+ with:
215
+ python-version: '3.11'
216
+
217
+ - name: Tests dependencies
218
+ id: test_dependencies
219
+ run: |
220
+ pip install -r examples/server/tests/requirements.txt
221
+
222
+ - name: Copy Libcurl
223
+ id: prepare_libcurl
224
+ run: |
225
+ cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
226
+
227
+ - name: Tests
228
+ id: server_integration_tests
229
+ if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
230
+ run: |
231
+ cd examples/server/tests
232
+ $env:PYTHONIOENCODING = ":replace"
233
+ pytest -v -x -m "not slow"
234
+
235
+ - name: Slow tests
236
+ id: server_integration_tests_slow
237
+ if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
238
+ run: |
239
+ cd examples/server/tests
240
+ $env:SLOW_TESTS = "1"
241
+ pytest -v -x
.gitignore ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Extensions
2
+
3
+ *.a
4
+ *.bat
5
+ *.bin
6
+ *.d
7
+ *.dll
8
+ *.dot
9
+ *.etag
10
+ *.exe
11
+ *.gcda
12
+ *.gcno
13
+ *.gcov
14
+ *.gguf
15
+ *.gguf.json
16
+ *.lastModified
17
+ *.log
18
+ *.metallib
19
+ *.o
20
+ *.so
21
+ *.swp
22
+ *.tmp
23
+
24
+ # IDE / OS
25
+
26
+ .cache/
27
+ .ccls-cache/
28
+ .direnv/
29
+ .DS_Store
30
+ .envrc
31
+ .idea/
32
+ .swiftpm
33
+ .vs/
34
+ .vscode/
35
+ nppBackup
36
+
37
+
38
+ # Coverage
39
+
40
+ gcovr-report/
41
+ lcov-report/
42
+
43
+ # Build Artifacts
44
+
45
+ tags
46
+ .build/
47
+ build*
48
+ release
49
+ debug
50
+ !build-info.cmake
51
+ !build-info.cpp.in
52
+ !build-info.sh
53
+ !build.zig
54
+ !docs/build.md
55
+ /libllama.so
56
+ /llama-*
57
+ /vulkan-shaders-gen
58
+ android-ndk-*
59
+ arm_neon.h
60
+ cmake-build-*
61
+ CMakeSettings.json
62
+ compile_commands.json
63
+ ggml-metal-embed.metal
64
+ llama-batched-swift
65
+ /rpc-server
66
+ out/
67
+ tmp/
68
+ autogen-*.md
69
+
70
+ # Deprecated
71
+
72
+ /main
73
+ /server
74
+
75
+ # CI
76
+
77
+ !.github/workflows/*.yml
78
+
79
+ # Models
80
+
81
+ models/*
82
+ models-mnt
83
+ !models/.editorconfig
84
+ !models/ggml-vocab-*.gguf*
85
+
86
+ # Zig
87
+ zig-out/
88
+ zig-cache/
89
+
90
+ # Logs
91
+
92
+ ppl-*.txt
93
+ qnt-*.txt
94
+ perf-*.txt
95
+
96
+ # Examples
97
+
98
+ examples/jeopardy/results.txt
99
+ examples/server/*.css.hpp
100
+ examples/server/*.html.hpp
101
+ examples/server/*.js.hpp
102
+ examples/server/*.mjs.hpp
103
+ examples/server/*.gz.hpp
104
+ !build_64.sh
105
+ !examples/*.bat
106
+ !examples/*/*.kts
107
+ !examples/*/*/*.kts
108
+ !examples/sycl/*.bat
109
+ !examples/sycl/*.sh
110
+ /*.wav
111
+
112
+ # Server Web UI temporary files
113
+ node_modules
114
+ examples/server/webui/dist
115
+
116
+ # Python
117
+
118
+ /.venv
119
+ __pycache__/
120
+ */poetry.lock
121
+ poetry.toml
122
+
123
+ # Nix
124
+ /result
125
+
126
+ # Test binaries
127
+ /tests/test-backend-ops
128
+ /tests/test-double-float
129
+ /tests/test-grad0
130
+ /tests/test-grammar-parser
131
+ /tests/test-llama-grammar
132
+ /tests/test-opt
133
+ /tests/test-quantize-fns
134
+ /tests/test-quantize-perf
135
+ /tests/test-rope
136
+ /tests/test-sampling
137
+ /tests/test-tokenizer-0
138
+ /tests/test-tokenizer-1-bpe
139
+ /tests/test-tokenizer-1-spm
140
+
141
+ # Scripts
142
+ !/scripts/install-oneapi.bat
143
+
144
+ # Test models for lora adapters
145
+ /lora-tests
146
+
147
+ # Local scripts
148
+ /run-vim.sh
149
+ /run-chat.sh
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "kompute"]
2
+ path = ggml/src/ggml-kompute/kompute
3
+ url = https://github.com/nomic-ai/kompute.git