diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f524f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Source caches and build artefacts (gitignored). The pinned PINNED.json +# is the only committed artefact; tarballs and extracted trees are +# materialized on demand by fetch.sh / build.sh. +*.tar.gz +*.tar.xz +src/ diff --git a/PINNED.json b/PINNED.json new file mode 100644 index 0000000..6592707 --- /dev/null +++ b/PINNED.json @@ -0,0 +1,11 @@ +{ + "schema": "cxos.vendor.llama-cpp/v1", + "version": "b3500", + "series": "ggml-llama.cpp", + "tarball_url": "https://github.com/ggerganov/llama.cpp/archive/refs/tags/b3500.tar.gz", + "signature_url": null, + "sha256": "0000000000000000000000000000000000000000000000000000000000000000", + "gpg_signing_keys": [], + "extracted_dir": "llama.cpp-b3500", + "notes": "CxLLM-Arch v1.0 GA inference engine. Update both 'version' and 'sha256' in the same commit. SHA-256 must be re-verified with 'sha256sum b3500.tar.gz' after each bump. The placeholder 0000... sha must be replaced with the real digest before fetch.sh will succeed; CI gating is intentional. Upstream tarballs are not GPG-signed by ggerganov; CxLLM keeps the array empty rather than spoofing trust." +} diff --git a/README.md b/README.md index cbfcea1..e111071 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,31 @@ -# cxos-vendor-llama-cpp +# cxos/vendor/llama-cpp/ — pinned llama.cpp / ggml inference engine -CxOS vendor: llama-cpp \ No newline at end of file +CxLLM-Arch's Core inference backend embeds [llama.cpp](https://github.com/ggerganov/llama.cpp) +through this vendor shim. We do **not** commit the multi-hundred-megabyte +source tree; this directory holds: + +* **`PINNED.json`** — exact upstream tag, tarball URL, and SHA-256 + CxLLM trusts. Bumping is a single-commit operation: update both + `version` and `sha256` together, ideally with a co-located CI run + that proves reproducibility. +* **`fetch.sh`** — downloads the tarball, verifies SHA-256, and extracts + to `src/llama.cpp-/` (gitignored). Refuses to run when + `PINNED.json` still has the placeholder all-zeros sha. +* **`build.sh`** — invokes `cmake … --install` into + `dist/cxllm-arch/llama-cpp/`, with backend toggles via + `--backend {cpu,vulkan,cuda,hip,opencl}` (multi-flag). + +Run via the top-level Makefile: + +```sh +make cxos-vendor-llama # fetch + verify +make cxos-vendor-llama-build # CPU only +make cxos-vendor-llama-build BACKENDS="vulkan cuda" +``` + +CxLLM-Arch's `Core/CMakeLists.txt` consumes the install prefix produced +here when `CXLLM_USE_LLAMA_CPP=ON` (default for production builds). + +Trust model: tarballs from upstream are not GPG-signed, so we anchor on +SHA-256 in PINNED.json. Bumps are reviewed and reproduced in CI before +merging. diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..00f15c3 --- /dev/null +++ b/build.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# cxos/vendor/llama-cpp/build.sh — build the pinned llama.cpp for CxLLM-Arch. +# shellcheck disable=SC2012 +# +# Prerequisites: +# * fetch.sh has run and verified the source under src/llama.cpp-/ +# * Host has cmake, ninja-build, gcc (and optional Vulkan/CUDA/HIP/OpenCL +# SDKs for the corresponding backends). +# +# Usage: +# build.sh # CPU only +# build.sh --backend vulkan # +Vulkan +# build.sh --backend cuda # +CUDA +# build.sh --backend hip # +ROCm/HIP +# build.sh --backend opencl # +OpenCL +# build.sh --backend vulkan --backend cuda # multiple +# build.sh --dry # print plan; no compile +# +# Outputs (relative to repo root): +# dist/cxllm-arch/llama-cpp/include/... +# dist/cxllm-arch/llama-cpp/lib/libllama.a +# dist/cxllm-arch/llama-cpp/lib/libggml.a +set -euo pipefail + +BACKENDS=() +DRY=0 +JOBS="${JOBS:-$(nproc 2>/dev/null || echo 2)}" + +while (( "$#" )); do + case "$1" in + --backend) BACKENDS+=("$2"); shift 2 ;; + --dry) DRY=1; shift ;; + --jobs) JOBS="$2"; shift 2 ;; + -h|--help) + sed -n '2,18p' "$0" | sed 's/^# \{0,1\}//' + exit 0 ;; + *) echo "build.sh: unknown arg: $1" >&2; exit 2 ;; + esac +done + +HERE="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$HERE/../../.." && pwd)" +PINNED="$HERE/PINNED.json" + +VERSION="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["version"])' "$PINNED")" +EXTRACTED_DIR="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["extracted_dir"])' "$PINNED")" +SRC="$HERE/src/$EXTRACTED_DIR" +OUT="$REPO_ROOT/dist/cxllm-arch/llama-cpp" + +CMAKE_FLAGS=( + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX="$OUT" + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DLLAMA_BUILD_TESTS=OFF + -DLLAMA_BUILD_EXAMPLES=OFF + -DLLAMA_BUILD_SERVER=OFF +) + +for b in "${BACKENDS[@]:-}"; do + case "$b" in + cpu) ;; + vulkan) CMAKE_FLAGS+=(-DGGML_VULKAN=ON) ;; + cuda) CMAKE_FLAGS+=(-DGGML_CUDA=ON) ;; + hip|rocm) CMAKE_FLAGS+=(-DGGML_HIP=ON) ;; + opencl) CMAKE_FLAGS+=(-DGGML_OPENCL=ON) ;; + "") ;; + *) echo "build.sh: unknown backend: $b" >&2; exit 2 ;; + esac +done + +echo "==> llama.cpp ${VERSION}" +echo " src : ${SRC}" +echo " out : ${OUT}" +echo " flags : ${CMAKE_FLAGS[*]}" +echo " jobs : ${JOBS}" + +if [[ "$DRY" == "1" ]]; then + echo "==> dry-run; not compiling" + exit 0 +fi + +if [[ ! -d "$SRC" ]]; then + echo "build.sh: source not extracted; run fetch.sh first" >&2 + exit 1 +fi + +BUILD="$SRC/build" +mkdir -p "$BUILD" "$OUT" + +cmake -S "$SRC" -B "$BUILD" -G Ninja "${CMAKE_FLAGS[@]}" +cmake --build "$BUILD" --parallel "$JOBS" +cmake --install "$BUILD" + +echo "==> built: $OUT/lib/$(ls "$OUT/lib" 2>/dev/null | head -1 || echo '')" diff --git a/fetch.sh b/fetch.sh new file mode 100644 index 0000000..694d302 --- /dev/null +++ b/fetch.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# cxos/vendor/llama-cpp/fetch.sh — download + verify + extract llama.cpp. +# +# Reads cxos/vendor/llama-cpp/PINNED.json. Hard-fails if SHA-256 mismatches. +# Upstream does not sign tarballs, so GPG verification is unsupported here; +# trust is anchored to the SHA-256 in PINNED.json (committed and reviewed). +# +# Usage: +# cxos/vendor/llama-cpp/fetch.sh +# cxos/vendor/llama-cpp/fetch.sh --dry # print plan; no network +# +# Outputs (gitignored): +# cxos/vendor/llama-cpp/llama.cpp-.tar.gz +# cxos/vendor/llama-cpp/src/llama.cpp-/ +set -euo pipefail + +DRY=0 +for arg in "$@"; do + case "$arg" in + --dry) DRY=1 ;; + -h|--help) + sed -n '2,15p' "$0" | sed 's/^# \{0,1\}//' + exit 0 ;; + *) + echo "fetch.sh: unknown arg: $arg" >&2 + exit 2 ;; + esac +done + +HERE="$(cd "$(dirname "$0")" && pwd)" +PINNED="$HERE/PINNED.json" + +if [[ ! -f "$PINNED" ]]; then + echo "fetch.sh: missing $PINNED" >&2 + exit 1 +fi + +read -r VERSION TARBALL_URL SHA256 EXTRACTED_DIR < <( + python3 - "$PINNED" <<'PY' +import json, sys +d = json.load(open(sys.argv[1])) +print(d["version"], d["tarball_url"], d["sha256"], d["extracted_dir"]) +PY +) + +TARBALL="$HERE/llama.cpp-${VERSION}.tar.gz" +SRC_DIR="$HERE/src" + +echo "==> llama.cpp ${VERSION}" +echo " tarball: ${TARBALL_URL}" +echo " sha256 : ${SHA256}" +echo " extract: ${SRC_DIR}/${EXTRACTED_DIR}" + +if [[ "$SHA256" =~ ^0+$ ]]; then + cat >&2 < dry-run; nothing fetched" + exit 0 +fi + +mkdir -p "$SRC_DIR" + +if [[ ! -f "$TARBALL" ]]; then + echo "==> downloading $(basename "$TARBALL")" + curl -fsSL --retry 3 -o "$TARBALL" "$TARBALL_URL" +fi + +echo "==> verifying sha256" +ACTUAL_SHA="$(sha256sum "$TARBALL" | awk '{print $1}')" +if [[ "$ACTUAL_SHA" != "$SHA256" ]]; then + echo "fetch.sh: SHA-256 mismatch" >&2 + echo " expected: $SHA256" >&2 + echo " actual : $ACTUAL_SHA" >&2 + rm -f "$TARBALL" + exit 1 +fi +echo " ok" + +if [[ ! -d "$SRC_DIR/$EXTRACTED_DIR" ]]; then + echo "==> extracting" + tar -xzf "$TARBALL" -C "$SRC_DIR" +fi + +echo "==> ready: $SRC_DIR/$EXTRACTED_DIR"