本项目只是自动拉取代码并构建上传二进制文件 原始仓库 https://github.com/ggml-org/llama.cpp
云开发环境手动执行构建
mkdir empty && cd empty && git clone https://github.com/ggml-org/llama.cpp .
cmake -B build -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
cmake --build build --config Release -j 16
手动上传文件到release
docker run --rm -e TZ=Asia/Shanghai -e CNB_TOKEN=$CNB_TOKEN -e CNB_API_ENDPOINT='https://api.cnb.cool' -e CNB_WEB_ENDPOINT='https://cnb.cool' -e CNB_REPO_SLUG='free_llm/llama.cpp-build' -e PLUGIN_ATTACHMENTS='./empty/build/bin/llama-server' -v $(pwd):$(pwd) -w $(pwd) cnbcool/attachments:latest
删除代码 rm -rf empty
turbo quant mkdir temp && cd temp git clone https://github.com/TheTom/llama-cpp-turboquant.git cd llama-cpp-turboquant git checkout feature/turboquant-kv-cache cmake -B build -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build --config Release -j 16 cp ./build/bin/llama-server /workspace/ docker run --rm -e TZ=Asia/Shanghai -e CNB_TOKEN=(pwd):(pwd) cnbcool/attachments:latest
参数 建议 -ctk turbo3 -ctv turbo3 -ctk q8_0 -ctv turbo4
dflash 投机解码验证 mkdir empty && cd empty git clone https://github.com/spiritbuun/buun-llama-cpp cd buun-llama-cpp
cmake -B build -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DGGML_NATIVE=ON -DGGML_CUDA_FA=ON -DGGML_CUDA_FA_ALL_QUANTS=ON cmake --build build --config Release -j 16 docker run --rm -e TZ=Asia/Shanghai -e CNB_TOKEN=(pwd):(pwd) cnbcool/attachments:latest