diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7f524f3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+# Source caches and build artefacts (gitignored). The pinned PINNED.json
+# is the only committed artefact; tarballs and extracted trees are
+# materialized on demand by fetch.sh / build.sh.
+*.tar.gz
+*.tar.xz
+src/
diff --git a/PINNED.json b/PINNED.json
new file mode 100644
index 0000000..6592707
--- /dev/null
+++ b/PINNED.json
@@ -0,0 +1,11 @@
+{
+  "schema": "cxos.vendor.llama-cpp/v1",
+  "version": "b3500",
+  "series": "ggml-llama.cpp",
+  "tarball_url": "https://github.com/ggerganov/llama.cpp/archive/refs/tags/b3500.tar.gz",
+  "signature_url": null,
+  "sha256": "0000000000000000000000000000000000000000000000000000000000000000",
+  "gpg_signing_keys": [],
+  "extracted_dir": "llama.cpp-b3500",
+  "notes": "CxLLM-Arch v1.0 GA inference engine. Update both 'version' and 'sha256' in the same commit. SHA-256 must be re-verified with 'sha256sum b3500.tar.gz' after each bump. The placeholder 0000... sha must be replaced with the real digest before fetch.sh will succeed; CI gating is intentional. Upstream tarballs are not GPG-signed by ggerganov; CxLLM keeps the array empty rather than spoofing trust."
+}
diff --git a/README.md b/README.md
index cbfcea1..e111071 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,31 @@
-# cxos-vendor-llama-cpp
+# cxos/vendor/llama-cpp/ — pinned llama.cpp / ggml inference engine
 
-CxOS vendor: llama-cpp
\ No newline at end of file
+CxLLM-Arch's Core inference backend embeds [llama.cpp](https://github.com/ggerganov/llama.cpp)
+through this vendor shim. We do **not** commit the multi-hundred-megabyte
+source tree; this directory holds:
+
+* **`PINNED.json`** — exact upstream tag, tarball URL, and SHA-256
+  CxLLM trusts. Bumping is a single-commit operation: update both
+  `version` and `sha256` together, ideally with a co-located CI run
+  that proves reproducibility.
+* **`fetch.sh`** — downloads the tarball, verifies SHA-256, and extracts
+  to `src/llama.cpp-<ver>/` (gitignored). Refuses to run when
+  `PINNED.json` still has the placeholder all-zeros sha.
+* **`build.sh`** — invokes `cmake … --install` into
+  `dist/cxllm-arch/llama-cpp/`, with backend toggles via
+  `--backend {cpu,vulkan,cuda,hip,opencl}` (multi-flag).
+
+Run via the top-level Makefile:
+
+```sh
+make cxos-vendor-llama          # fetch + verify
+make cxos-vendor-llama-build    # CPU only
+make cxos-vendor-llama-build BACKENDS="vulkan cuda"
+```
+
+CxLLM-Arch's `Core/CMakeLists.txt` consumes the install prefix produced
+here when `CXLLM_USE_LLAMA_CPP=ON` (default for production builds).
+
+Trust model: tarballs from upstream are not GPG-signed, so we anchor on
+SHA-256 in PINNED.json. Bumps are reviewed and reproduced in CI before
+merging.
diff --git a/build.sh b/build.sh
new file mode 100644
index 0000000..00f15c3
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+# cxos/vendor/llama-cpp/build.sh — build the pinned llama.cpp for CxLLM-Arch.
+# shellcheck disable=SC2012
+#
+# Prerequisites:
+#   * fetch.sh has run and verified the source under src/llama.cpp-<ver>/
+#   * Host has cmake, ninja-build, gcc (and optional Vulkan/CUDA/HIP/OpenCL
+#     SDKs for the corresponding backends).
+#
+# Usage:
+#   build.sh                                   # CPU only
+#   build.sh --backend vulkan                  # +Vulkan
+#   build.sh --backend cuda                    # +CUDA
+#   build.sh --backend hip                     # +ROCm/HIP
+#   build.sh --backend opencl                  # +OpenCL
+#   build.sh --backend vulkan --backend cuda   # multiple
+#   build.sh --dry                             # print plan; no compile
+#
+# Outputs (relative to repo root):
+#   dist/cxllm-arch/llama-cpp/include/...
+#   dist/cxllm-arch/llama-cpp/lib/libllama.a
+#   dist/cxllm-arch/llama-cpp/lib/libggml.a
+set -euo pipefail
+
+BACKENDS=()
+DRY=0
+JOBS="${JOBS:-$(nproc 2>/dev/null || echo 2)}"
+
+while (( "$#" )); do
+    case "$1" in
+        --backend) BACKENDS+=("$2"); shift 2 ;;
+        --dry)     DRY=1; shift ;;
+        --jobs)    JOBS="$2"; shift 2 ;;
+        -h|--help)
+            sed -n '2,18p' "$0" | sed 's/^# \{0,1\}//'
+            exit 0 ;;
+        *) echo "build.sh: unknown arg: $1" >&2; exit 2 ;;
+    esac
+done
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$HERE/../../.." && pwd)"
+PINNED="$HERE/PINNED.json"
+
+VERSION="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["version"])' "$PINNED")"
+EXTRACTED_DIR="$(python3 -c 'import json,sys;print(json.load(open(sys.argv[1]))["extracted_dir"])' "$PINNED")"
+SRC="$HERE/src/$EXTRACTED_DIR"
+OUT="$REPO_ROOT/dist/cxllm-arch/llama-cpp"
+
+CMAKE_FLAGS=(
+    -DCMAKE_BUILD_TYPE=Release
+    -DCMAKE_INSTALL_PREFIX="$OUT"
+    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+    -DLLAMA_BUILD_TESTS=OFF
+    -DLLAMA_BUILD_EXAMPLES=OFF
+    -DLLAMA_BUILD_SERVER=OFF
+)
+
+for b in "${BACKENDS[@]:-}"; do
+    case "$b" in
+        cpu)     ;;
+        vulkan)  CMAKE_FLAGS+=(-DGGML_VULKAN=ON) ;;
+        cuda)    CMAKE_FLAGS+=(-DGGML_CUDA=ON) ;;
+        hip|rocm) CMAKE_FLAGS+=(-DGGML_HIP=ON) ;;
+        opencl)  CMAKE_FLAGS+=(-DGGML_OPENCL=ON) ;;
+        "") ;;
+        *) echo "build.sh: unknown backend: $b" >&2; exit 2 ;;
+    esac
+done
+
+echo "==> llama.cpp ${VERSION}"
+echo "    src    : ${SRC}"
+echo "    out    : ${OUT}"
+echo "    flags  : ${CMAKE_FLAGS[*]}"
+echo "    jobs   : ${JOBS}"
+
+if [[ "$DRY" == "1" ]]; then
+    echo "==> dry-run; not compiling"
+    exit 0
+fi
+
+if [[ ! -d "$SRC" ]]; then
+    echo "build.sh: source not extracted; run fetch.sh first" >&2
+    exit 1
+fi
+
+BUILD="$SRC/build"
+mkdir -p "$BUILD" "$OUT"
+
+cmake -S "$SRC" -B "$BUILD" -G Ninja "${CMAKE_FLAGS[@]}"
+cmake --build "$BUILD" --parallel "$JOBS"
+cmake --install "$BUILD"
+
+echo "==> built: $OUT/lib/$(ls "$OUT/lib" 2>/dev/null | head -1 || echo '<install>')"
diff --git a/fetch.sh b/fetch.sh
new file mode 100644
index 0000000..694d302
--- /dev/null
+++ b/fetch.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+# cxos/vendor/llama-cpp/fetch.sh — download + verify + extract llama.cpp.
+#
+# Reads cxos/vendor/llama-cpp/PINNED.json. Hard-fails if SHA-256 mismatches.
+# Upstream does not sign tarballs, so GPG verification is unsupported here;
+# trust is anchored to the SHA-256 in PINNED.json (committed and reviewed).
+#
+# Usage:
+#   cxos/vendor/llama-cpp/fetch.sh
+#   cxos/vendor/llama-cpp/fetch.sh --dry      # print plan; no network
+#
+# Outputs (gitignored):
+#   cxos/vendor/llama-cpp/llama.cpp-<ver>.tar.gz
+#   cxos/vendor/llama-cpp/src/llama.cpp-<ver>/
+set -euo pipefail
+
+DRY=0
+for arg in "$@"; do
+    case "$arg" in
+        --dry)      DRY=1 ;;
+        -h|--help)
+            sed -n '2,15p' "$0" | sed 's/^# \{0,1\}//'
+            exit 0 ;;
+        *)
+            echo "fetch.sh: unknown arg: $arg" >&2
+            exit 2 ;;
+    esac
+done
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+PINNED="$HERE/PINNED.json"
+
+if [[ ! -f "$PINNED" ]]; then
+    echo "fetch.sh: missing $PINNED" >&2
+    exit 1
+fi
+
+read -r VERSION TARBALL_URL SHA256 EXTRACTED_DIR < <(
+    python3 - "$PINNED" <<'PY'
+import json, sys
+d = json.load(open(sys.argv[1]))
+print(d["version"], d["tarball_url"], d["sha256"], d["extracted_dir"])
+PY
+)
+
+TARBALL="$HERE/llama.cpp-${VERSION}.tar.gz"
+SRC_DIR="$HERE/src"
+
+echo "==> llama.cpp ${VERSION}"
+echo "    tarball: ${TARBALL_URL}"
+echo "    sha256 : ${SHA256}"
+echo "    extract: ${SRC_DIR}/${EXTRACTED_DIR}"
+
+if [[ "$SHA256" =~ ^0+$ ]]; then
+    cat >&2 <<EOF
+fetch.sh: PINNED.json has placeholder sha256 (0000...). This is intentional
+and means the pin has not been verified yet. To bump:
+
+  curl -fsSL "${TARBALL_URL}" -o /tmp/llama.cpp-${VERSION}.tar.gz
+  sha256sum /tmp/llama.cpp-${VERSION}.tar.gz
+
+Update cxos/vendor/llama-cpp/PINNED.json with the resulting digest in the
+same commit, then re-run fetch.sh.
+EOF
+    exit 1
+fi
+
+if [[ "$DRY" == "1" ]]; then
+    echo "==> dry-run; nothing fetched"
+    exit 0
+fi
+
+mkdir -p "$SRC_DIR"
+
+if [[ ! -f "$TARBALL" ]]; then
+    echo "==> downloading $(basename "$TARBALL")"
+    curl -fsSL --retry 3 -o "$TARBALL" "$TARBALL_URL"
+fi
+
+echo "==> verifying sha256"
+ACTUAL_SHA="$(sha256sum "$TARBALL" | awk '{print $1}')"
+if [[ "$ACTUAL_SHA" != "$SHA256" ]]; then
+    echo "fetch.sh: SHA-256 mismatch" >&2
+    echo "  expected: $SHA256"   >&2
+    echo "  actual  : $ACTUAL_SHA" >&2
+    rm -f "$TARBALL"
+    exit 1
+fi
+echo "    ok"
+
+if [[ ! -d "$SRC_DIR/$EXTRACTED_DIR" ]]; then
+    echo "==> extracting"
+    tar -xzf "$TARBALL" -C "$SRC_DIR"
+fi
+
+echo "==> ready: $SRC_DIR/$EXTRACTED_DIR"