|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Name: llama.cpp-cuda |
|
Version: %( date "+%%Y%%m%%d" ) |
|
Release: 1%{?dist} |
|
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) |
|
License: MIT |
|
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz |
|
BuildRequires: coreutils make gcc-c++ git cuda-toolkit |
|
Requires: cuda-toolkit |
|
URL: https://github.com/ggerganov/llama.cpp |
|
|
|
%define debug_package %{nil} |
|
%define source_date_epoch_from_changelog 0 |
|
|
|
%description |
|
CPU inference for Meta's Lllama2 models using default options. |
|
|
|
%prep |
|
%setup -n llama.cpp-master |
|
|
|
%build |
|
make -j GGML_CUDA=1 |
|
|
|
%install |
|
mkdir -p %{buildroot}%{_bindir}/ |
|
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli |
|
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server |
|
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple |
|
|
|
mkdir -p %{buildroot}/usr/lib/systemd/system |
|
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service |
|
[Unit] |
|
Description=Llama.cpp server, CPU only (no GPU support in this build). |
|
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target |
|
|
|
[Service] |
|
Type=simple |
|
EnvironmentFile=/etc/sysconfig/llama |
|
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS |
|
ExecReload=/bin/kill -s HUP $MAINPID |
|
Restart=never |
|
|
|
[Install] |
|
WantedBy=default.target |
|
EOF |
|
|
|
mkdir -p %{buildroot}/etc/sysconfig |
|
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama |
|
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" |
|
EOF |
|
|
|
%clean |
|
rm -rf %{buildroot} |
|
rm -rf %{_builddir}/* |
|
|
|
%files |
|
%{_bindir}/llama-cuda-cli |
|
%{_bindir}/llama-cuda-server |
|
%{_bindir}/llama-cuda-simple |
|
/usr/lib/systemd/system/llamacuda.service |
|
%config /etc/sysconfig/llama |
|
|
|
%pre |
|
|
|
%post |
|
|
|
%preun |
|
%postun |
|
|
|
%changelog |
|
|