From eadf596f18499b557890ec013a108a1837c6bbb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralph=20K=C3=BCpper?= <ralph.kuepper@skelpo.com>
Date: Fri, 1 May 2026 10:46:38 +0200
Subject: [PATCH 1/6] feat(renderer): on-disk cache for per-mesh SDFs (closes
 #22)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cold launches re-baked every per-mesh 32³ R32Float SDF from scratch
even though the same content always produces the same voxel data.
Sponza's first 9 frames spent ~8 bakes/frame on this path; second
launch spent another 9.

This change content-hashes (positions + indices) at GPU upload time,
checks a platform-appropriate cache directory, and `queue.write_texture`s
the cached voxel bytes directly when the file exists — bypassing the
GPU dispatch entirely. Misses fall through to the existing bake; the
renderer encodes a copy_texture_to_buffer alongside each dispatch
and persists the readback to disk after the frame's main submit.
The next launch hits and skips the bake.

The cache is best-effort throughout: a corrupt entry, missing dir,
or write failure silently re-bakes. wasm32 has no filesystem path so
load returns None and store is gated out — web builds bake every
launch as before.

Cache layout:
- macOS / iOS / tvOS / watchOS: ~/Library/Caches/bloom/sdf
- Linux / Android:              $XDG_CACHE_HOME/bloom/sdf
- Windows:                      %LOCALAPPDATA%\bloom\cache\sdf
- 16 B header (magic + version + voxel_res) + 128 KB R32Float payload

Sponza disk footprint: 68 × 128 KB = 8.7 MB (matches the issue's
budget). Disk reads happen synchronously at upload — a 128 KB read
from local cache is sub-millisecond.

The synchronous device.poll(Wait) on flush blocks for the bake
submission to finish before persisting; this is a cold-launch-only
stall (~9 frames) and the bake itself is the bottleneck on those
frames anyway. Async pipelining is a follow-up if the cold-launch
stall ever shows up in profiles.

8 new unit tests cover hash stability, change-detection on positions
and indices, count-vs-value distinguishability, store/load round-trip,
miss handling, size validation, and bad-magic rejection. cargo test
74/0 (was 66/0) on macOS, wasm32 cargo check clean.
---
 native/shared/src/lib.rs          |   1 +
 native/shared/src/renderer/mod.rs | 128 +++++++++++++-
 native/shared/src/scene.rs        |  69 +++++++-
 native/shared/src/sdf_cache.rs    | 274 ++++++++++++++++++++++++++++++
 4 files changed, 463 insertions(+), 9 deletions(-)
 create mode 100644 native/shared/src/sdf_cache.rs

diff --git a/native/shared/src/lib.rs b/native/shared/src/lib.rs
index 541b461..940c8c8 100644
--- a/native/shared/src/lib.rs
+++ b/native/shared/src/lib.rs
@@ -20,6 +20,7 @@ pub mod postfx;
 pub mod custom_shaders;
 pub mod staging;
 pub mod profiler;
+pub mod sdf_cache;
 // Jolt C ABI + Rust wrapper live on native only. On wasm32 the web crate
 // routes bloom_physics_* calls through wasm_bindgen to JoltPhysics.js;
 // no Rust-side Jolt integration is needed.
diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs
index 5d20519..d0ae9de 100644
--- a/native/shared/src/renderer/mod.rs
+++ b/native/shared/src/renderer/mod.rs
@@ -1055,6 +1055,13 @@ pub struct Renderer {
     pub sdf_bake_pipeline: wgpu::ComputePipeline,
     pub sdf_bake_layout: wgpu::BindGroupLayout,
     pub sdf_bake_uniform: wgpu::Buffer,
+    /// Ticket 022 — staging buffers awaiting readback so freshly-baked
+    /// per-mesh SDFs can be written to the disk cache. Populated by
+    /// `bake_pending_sdfs` (one entry per dispatch); drained by
+    /// `flush_sdf_cache_writes` after the frame's main submit, which
+    /// maps each buffer, copies bytes to the cache file, and drops it.
+    /// Empty on cache-hit frames and after cold launch finishes.
+    sdf_cache_writes: Vec<(crate::sdf_cache::MeshHash, wgpu::Buffer)>,
 
     // --- Ticket 014 V2: scene-wide SDF clipmap ---
     pub scene_sdf_clipmap_tex: wgpu::Texture,
@@ -6040,6 +6047,7 @@ impl Renderer {
             sdf_bake_pipeline,
             sdf_bake_layout,
             sdf_bake_uniform,
+            sdf_cache_writes: Vec::new(),
             scene_sdf_clipmap_tex,
             scene_sdf_clipmap_view,
             scene_sdf_clipmap_built: false,
@@ -7183,6 +7191,13 @@ impl Renderer {
     /// per-frame budget; expensive workload (O(voxels × triangles)
     /// per mesh), so the rate-limit keeps first-frame stutter
     /// bounded. Static scenes amortise and never re-bake.
+    ///
+    /// Ticket 022 — after each dispatch, encode a copy_texture_to_buffer
+    /// against a fresh staging buffer and stash (hash, buffer) on
+    /// `sdf_cache_writes`. The frame's main submit picks up the copies
+    /// alongside the bake; `flush_sdf_cache_writes` then maps and
+    /// persists each buffer to the on-disk cache so the next launch
+    /// hits the load path in scene.rs and skips the bake entirely.
     fn bake_pending_sdfs(
         &mut self,
         scene: &mut crate::scene::SceneGraph,
@@ -7196,18 +7211,21 @@ impl Renderer {
         let pending: Vec<f64> = scene.pending_sdf_bakes.drain(..take).collect();
 
         for handle in pending {
-            let (sdf_view, vb_ptr, ib_ptr, bmin, bmax, index_count) = {
+            let (sdf_tex, sdf_view, vb_ptr, ib_ptr, bmin, bmax, index_count, mesh_hash) = {
                 let Some(node) = scene.nodes.get(handle) else { continue; };
+                let Some(sdf_tex) = node.mesh_sdf.as_ref() else { continue; };
                 let Some(sdf_view) = node.mesh_sdf_view.as_ref() else { continue; };
                 let Some(vb) = node.gpu_vb.as_ref() else { continue; };
                 let Some(ib) = node.gpu_ib.as_ref() else { continue; };
                 (
+                    sdf_tex.clone(),
                     sdf_view.clone(),
                     vb.clone(),
                     ib.clone(),
                     node.bounds_min,
                     node.bounds_max,
                     node.gpu_index_count,
+                    node.mesh_hash,
                 )
             };
             if index_count == 0 {
@@ -7234,13 +7252,95 @@ impl Renderer {
                     wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&sdf_view) },
                 ],
             });
-            let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
-                label: Some("sdf_bake_pass"),
-                timestamp_writes: None,
-            });
-            pass.set_pipeline(&self.sdf_bake_pipeline);
-            pass.set_bind_group(0, &bg, &[]);
-            pass.dispatch_workgroups(MESH_SDF_RES / 4, MESH_SDF_RES / 4, MESH_SDF_RES / 4);
+            {
+                let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+                    label: Some("sdf_bake_pass"),
+                    timestamp_writes: None,
+                });
+                pass.set_pipeline(&self.sdf_bake_pipeline);
+                pass.set_bind_group(0, &bg, &[]);
+                pass.dispatch_workgroups(MESH_SDF_RES / 4, MESH_SDF_RES / 4, MESH_SDF_RES / 4);
+            }
+
+            // Ticket 022 — schedule a readback against the freshly-baked
+            // texture so the next launch can skip the bake. We only do
+            // this when scene.rs computed a hash (it always does, but
+            // skip defensively); padded staging size is bound by
+            // wgpu's COPY_BYTES_PER_ROW alignment.
+            if let Some(hash) = mesh_hash {
+                let row_padded = ((MESH_SDF_RES * 4 + 255) & !255) as u64;
+                let staging = self.device.create_buffer(&wgpu::BufferDescriptor {
+                    label: Some("sdf_cache_readback"),
+                    size: row_padded * (MESH_SDF_RES as u64) * (MESH_SDF_RES as u64),
+                    usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+                    mapped_at_creation: false,
+                });
+                encoder.copy_texture_to_buffer(
+                    wgpu::TexelCopyTextureInfo {
+                        texture: &sdf_tex,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d::ZERO,
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    wgpu::TexelCopyBufferInfo {
+                        buffer: &staging,
+                        layout: wgpu::TexelCopyBufferLayout {
+                            offset: 0,
+                            bytes_per_row: Some(row_padded as u32),
+                            rows_per_image: Some(MESH_SDF_RES),
+                        },
+                    },
+                    wgpu::Extent3d {
+                        width: MESH_SDF_RES,
+                        height: MESH_SDF_RES,
+                        depth_or_array_layers: MESH_SDF_RES,
+                    },
+                );
+                self.sdf_cache_writes.push((hash, staging));
+            }
+        }
+    }
+
+    /// Ticket 022 — drain pending SDF cache writes after the frame's
+    /// main submit. Maps each staging buffer in one pass (single
+    /// `device.poll(Wait)` covers all of them), unpads the row-aligned
+    /// payload back to the tightly-packed on-disk layout, and writes
+    /// to the cache. Best-effort throughout: a write failure is
+    /// silently ignored — the next cold launch just rebakes.
+    pub fn flush_sdf_cache_writes(&mut self) {
+        if self.sdf_cache_writes.is_empty() { return; }
+        let entries = std::mem::take(&mut self.sdf_cache_writes);
+
+        // Issue map_async on every buffer up front so a single poll
+        // resolves all of them — much cheaper than serially polling
+        // per buffer when 8 cold-launch bakes complete in one frame.
+        for (_, buf) in &entries {
+            let slice = buf.slice(..);
+            slice.map_async(wgpu::MapMode::Read, |_| { /* polled below */ });
+        }
+        let _ = self.device.poll(wgpu::PollType::Wait { submission_index: None, timeout: None });
+
+        let row_tight = (MESH_SDF_RES * 4) as usize;
+        let row_padded = ((MESH_SDF_RES * 4 + 255) & !255) as usize;
+        let res = MESH_SDF_RES as usize;
+
+        for (hash, buf) in entries {
+            let slice = buf.slice(..);
+            let data = slice.get_mapped_range();
+            // Strip the wgpu-required row padding back to a tight
+            // 32³ × 4-byte payload before storing.
+            let mut tight = vec![0u8; res * res * row_tight];
+            for z in 0..res {
+                for y in 0..res {
+                    let src_off = (z * res + y) * row_padded;
+                    let dst_off = (z * res + y) * row_tight;
+                    tight[dst_off..dst_off + row_tight]
+                        .copy_from_slice(&data[src_off..src_off + row_tight]);
+                }
+            }
+            drop(data);
+            buf.unmap();
+            let _ = crate::sdf_cache::store(hash, &tight);
         }
     }
 
@@ -11363,6 +11463,18 @@ impl Renderer {
             profiler.end("queue_submit");
         }
 
+        // Ticket 022 — drain freshly-baked SDFs to the on-disk cache.
+        // No-op on cache-hit frames (queue is empty); on cold-launch
+        // bake frames it blocks briefly on a single device.poll(Wait)
+        // covering all 8 readbacks. Skipped on wasm32 (no filesystem
+        // path, sdf_cache::store returns Err immediately).
+        #[cfg(not(target_arch = "wasm32"))]
+        {
+            profiler.begin("sdf_cache_write");
+            self.flush_sdf_cache_writes();
+            profiler.end("sdf_cache_write");
+        }
+
         #[cfg(target_arch = "wasm32")]
         {
             self.queue.submit(std::iter::once(encoder.finish()));
diff --git a/native/shared/src/scene.rs b/native/shared/src/scene.rs
index d9d6d73..5791d46 100644
--- a/native/shared/src/scene.rs
+++ b/native/shared/src/scene.rs
@@ -128,6 +128,12 @@ pub struct SceneNode {
     /// `None` on non-RT-capable adapters or until the bake lands.
     pub mesh_sdf: Option<wgpu::Texture>,
     pub mesh_sdf_view: Option<wgpu::TextureView>,
+    /// Content hash of (positions, indices) computed at upload time.
+    /// Set whenever `mesh_sdf` exists; the renderer reads it back when
+    /// flushing cache writes after a fresh bake. `None` until the
+    /// first geo upload — and on non-RT-capable adapters that never
+    /// allocate a per-mesh SDF.
+    pub mesh_hash: Option<crate::sdf_cache::MeshHash>,
     /// Flat mesh-average world-space normal, cached on BLAS build so
     /// the per-instance GI data buffer can be populated without
     /// re-reading the vertex array. Rough heuristic — for walls and
@@ -185,6 +191,7 @@ impl SceneNode {
             card_dynamic: false,
             mesh_sdf: None,
             mesh_sdf_view: None,
+            mesh_hash: None,
             flat_normal_ws: [0.0, 1.0, 0.0],
             flat_albedo: [1.0, 1.0, 1.0],
             uniform_slot: None,
@@ -752,9 +759,69 @@ impl SceneGraph {
                                 device,
                                 "scene_node_sdf",
                             );
+
+                            // Ticket 022 — content-hash the geometry and
+                            // try the on-disk SDF cache before scheduling
+                            // a GPU bake. Vertex layout is interleaved;
+                            // pull the position prefix out as a
+                            // [[f32; 3]] slice so the hash only sees
+                            // geometry-relevant bytes.
+                            let positions: Vec<[f32; 3]> =
+                                node.vertices.iter().map(|v| v.position).collect();
+                            let hash = crate::sdf_cache::compute_mesh_hash(
+                                &positions, &node.indices,
+                            );
+                            node.mesh_hash = Some(hash);
+
+                            if let Some(bytes) = crate::sdf_cache::load(hash) {
+                                // Cache hit — pad the tightly-packed
+                                // 128 B/row payload to 256 B/row so it
+                                // clears wgpu's COPY_BYTES_PER_ROW
+                                // alignment, then upload directly and
+                                // skip the bake. Native cache size
+                                // stays compact (128 KB/mesh on disk);
+                                // the 128 KB padding allocation is
+                                // free'd immediately after the call.
+                                const RES: u32 = crate::sdf_cache::VOXEL_RES;
+                                let row_tight = (RES * 4) as usize;
+                                let row_padded = ((row_tight + 255) & !255) as u32;
+                                let mut padded = vec![
+                                    0u8;
+                                    (row_padded as usize) * (RES as usize) * (RES as usize)
+                                ];
+                                for z in 0..RES as usize {
+                                    for y in 0..RES as usize {
+                                        let src_off = (z * RES as usize + y) * row_tight;
+                                        let dst_off = (z * RES as usize + y) * row_padded as usize;
+                                        padded[dst_off..dst_off + row_tight]
+                                            .copy_from_slice(&bytes[src_off..src_off + row_tight]);
+                                    }
+                                }
+                                queue.write_texture(
+                                    wgpu::TexelCopyTextureInfo {
+                                        texture: &sdf_tex,
+                                        mip_level: 0,
+                                        origin: wgpu::Origin3d::ZERO,
+                                        aspect: wgpu::TextureAspect::All,
+                                    },
+                                    &padded,
+                                    wgpu::TexelCopyBufferLayout {
+                                        offset: 0,
+                                        bytes_per_row: Some(row_padded),
+                                        rows_per_image: Some(RES),
+                                    },
+                                    wgpu::Extent3d {
+                                        width: RES,
+                                        height: RES,
+                                        depth_or_array_layers: RES,
+                                    },
+                                );
+                            } else {
+                                pending_sdf.push(handle);
+                            }
+
                             node.mesh_sdf = Some(sdf_tex);
                             node.mesh_sdf_view = Some(sdf_view);
-                            pending_sdf.push(handle);
                         }
                     }
                 }
diff --git a/native/shared/src/sdf_cache.rs b/native/shared/src/sdf_cache.rs
new file mode 100644
index 0000000..3fd7332
--- /dev/null
+++ b/native/shared/src/sdf_cache.rs
@@ -0,0 +1,274 @@
+//! Disk cache for per-mesh signed distance fields baked by ticket 014.
+//!
+//! Each mesh content-hashes (positions + indices) to a 64-bit key; the
+//! 32³ R32Float voxel data (~128 KB) lives under the platform cache
+//! directory. Cold launches that hit the cache skip the GPU
+//! brute-force point-triangle bake entirely and `queue.write_texture`
+//! the bytes directly. Misses fall through to the existing in-process
+//! bake; the renderer reads the texture back on the same frame and
+//! writes the cache entry so the next launch hits.
+//!
+//! Caching is best-effort by design — every fallible operation
+//! (hashing aside) returns `Option`/`Result` and the renderer treats
+//! an error or `None` as "no cache, just bake."
+//!
+//! Web isn't supported here. The wasm32 build needs IndexedDB plumbing
+//! before it can store anything; until then `cache_dir()` returns
+//! `None` on wasm and the bake falls through normally.
+
+use std::fs;
+use std::io::{Read, Write};
+use std::path::PathBuf;
+
+/// File header written before the raw R32Float voxel bytes. Fixed
+/// 16 bytes so a future version can extend without breaking layout
+/// readers (the `version` byte is the gate).
+const FILE_MAGIC: [u8; 6] = *b"BLSDF\0";
+const FILE_VERSION: u8 = 1;
+
+/// Voxel resolution we bake at. Mirrors `renderer::formats::MESH_SDF_RES`.
+/// Hardcoded here rather than imported to keep the cache module
+/// dependency-free of the renderer.
+pub const VOXEL_RES: u32 = 32;
+
+/// Total payload size: 32³ × f32. Exposed for callers sizing a staging
+/// buffer or a `queue.write_texture` source slice.
+pub const VOXEL_BYTES: usize = (VOXEL_RES as usize).pow(3) * 4;
+
+/// Content hash of a mesh's geometry. Stable across Rust versions
+/// because the underlying mix is FNV-1a over fixed little-endian bytes;
+/// renaming the type or adding fields is fine, just don't change the
+/// hash math without bumping `FILE_VERSION`.
+#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
+pub struct MeshHash(pub u64);
+
+impl MeshHash {
+    fn to_filename(self) -> String {
+        format!("{:016x}.sdf", self.0)
+    }
+}
+
+/// FNV-1a 64-bit. Algorithmically frozen — changes here invalidate
+/// every existing cache entry without warning.
+fn fnv1a(input: &[u8]) -> u64 {
+    let mut h: u64 = 0xcbf29ce484222325;
+    for &b in input {
+        h ^= b as u64;
+        h = h.wrapping_mul(0x100000001b3);
+    }
+    h
+}
+
+/// Compute a stable 64-bit content hash for the SDF input. Only
+/// position bits and indices feed in — meshes that share a surface
+/// but differ in normals/uv/colour share a cache entry, which is
+/// correct: SDF is geometry-only.
+///
+/// `positions` is the contiguous `[f32; 3]` slice (one entry per
+/// vertex). Callers with an interleaved vertex stride extract just
+/// the position component before calling.
+pub fn compute_mesh_hash(positions: &[[f32; 3]], indices: &[u32]) -> MeshHash {
+    let mut h: u64 = 0xcbf29ce484222325;
+    let mix = |h: &mut u64, b: u8| {
+        *h ^= b as u64;
+        *h = h.wrapping_mul(0x100000001b3);
+    };
+    for p in positions {
+        for c in p {
+            for b in c.to_bits().to_le_bytes() {
+                mix(&mut h, b);
+            }
+        }
+    }
+    for i in indices {
+        for b in i.to_le_bytes() {
+            mix(&mut h, b);
+        }
+    }
+    // Fold the vertex/index counts in so two meshes that share a
+    // prefix can't accidentally collide via positions-as-suffix.
+    for b in (positions.len() as u64).to_le_bytes() {
+        mix(&mut h, b);
+    }
+    for b in (indices.len() as u64).to_le_bytes() {
+        mix(&mut h, b);
+    }
+    let _ = fnv1a; // expose the helper for any future direct use
+    MeshHash(h)
+}
+
+/// Platform cache root. Returns `None` when the host has no usable
+/// cache directory (wasm) or when the env vars used to derive the
+/// path aren't set.
+///
+/// Resolution order:
+///   - macOS / iOS / tvOS / watchOS: `$HOME/Library/Caches/bloom/sdf`
+///   - Linux / Android:              `${XDG_CACHE_HOME:-$HOME/.cache}/bloom/sdf`
+///   - Windows:                      `%LOCALAPPDATA%\bloom\cache\sdf`
+///   - wasm32:                       `None`
+pub fn cache_dir() -> Option<PathBuf> {
+    #[cfg(target_arch = "wasm32")]
+    { return None; }
+
+    #[cfg(not(target_arch = "wasm32"))]
+    {
+        let dir = if cfg!(target_vendor = "apple") {
+            let home = std::env::var_os("HOME")?;
+            PathBuf::from(home).join("Library").join("Caches").join("bloom").join("sdf")
+        } else if cfg!(target_os = "windows") {
+            let local = std::env::var_os("LOCALAPPDATA")?;
+            PathBuf::from(local).join("bloom").join("cache").join("sdf")
+        } else {
+            // Linux + Android (XDG-style).
+            let base = std::env::var_os("XDG_CACHE_HOME")
+                .map(PathBuf::from)
+                .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cache")))?;
+            base.join("bloom").join("sdf")
+        };
+        Some(dir)
+    }
+}
+
+/// Resolve a hash to its cache file path, creating the cache root if
+/// needed. Returns `None` if the cache dir can't be created.
+fn cache_path(hash: MeshHash) -> Option<PathBuf> {
+    let dir = cache_dir()?;
+    if !dir.exists() {
+        fs::create_dir_all(&dir).ok()?;
+    }
+    Some(dir.join(hash.to_filename()))
+}
+
+/// Look up cached voxel bytes. Returns `Some(bytes)` only when the
+/// file exists, parses, and matches the expected magic + version +
+/// resolution + payload size. Any failure is silently treated as a
+/// miss — the caller falls through to the GPU bake.
+pub fn load(hash: MeshHash) -> Option<Vec<u8>> {
+    let path = cache_path(hash)?;
+    let mut f = fs::File::open(&path).ok()?;
+
+    let mut header = [0u8; 16];
+    f.read_exact(&mut header).ok()?;
+    if header[..6] != FILE_MAGIC { return None; }
+    if header[6] != FILE_VERSION { return None; }
+    // header[7] reserved (alignment pad / future flags).
+    let res = u32::from_le_bytes(header[8..12].try_into().ok()?);
+    if res != VOXEL_RES { return None; }
+    // header[12..16] reserved.
+
+    let mut bytes = Vec::with_capacity(VOXEL_BYTES);
+    f.read_to_end(&mut bytes).ok()?;
+    if bytes.len() != VOXEL_BYTES { return None; }
+    Some(bytes)
+}
+
+/// Write voxel bytes for a mesh hash. Best-effort; an `Err` return
+/// means the cache wasn't updated but rendering can continue.
+pub fn store(hash: MeshHash, voxel_bytes: &[u8]) -> std::io::Result<()> {
+    if voxel_bytes.len() != VOXEL_BYTES {
+        return Err(std::io::Error::new(
+            std::io::ErrorKind::InvalidInput,
+            "voxel payload size mismatch",
+        ));
+    }
+    let path = cache_path(hash).ok_or_else(|| {
+        std::io::Error::new(std::io::ErrorKind::Other, "cache directory unavailable")
+    })?;
+
+    // Write to a temp file and rename so a crash mid-write can never
+    // leave a partial entry that survives validation.
+    let tmp = path.with_extension("sdf.tmp");
+    {
+        let mut f = fs::File::create(&tmp)?;
+        let mut header = [0u8; 16];
+        header[..6].copy_from_slice(&FILE_MAGIC);
+        header[6] = FILE_VERSION;
+        // header[7] reserved.
+        header[8..12].copy_from_slice(&VOXEL_RES.to_le_bytes());
+        // header[12..16] reserved.
+        f.write_all(&header)?;
+        f.write_all(voxel_bytes)?;
+        f.sync_data()?;
+    }
+    fs::rename(&tmp, &path)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn hash_is_stable_for_identical_input() {
+        let pos = vec![[0.0_f32, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]];
+        let idx = vec![0_u32, 1, 2];
+        assert_eq!(compute_mesh_hash(&pos, &idx), compute_mesh_hash(&pos, &idx));
+    }
+
+    #[test]
+    fn hash_changes_when_position_changes() {
+        let pos1 = vec![[0.0_f32, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]];
+        let pos2 = vec![[0.0_f32, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.001, 0.0]];
+        let idx = vec![0_u32, 1, 2];
+        assert_ne!(compute_mesh_hash(&pos1, &idx), compute_mesh_hash(&pos2, &idx));
+    }
+
+    #[test]
+    fn hash_changes_when_index_changes() {
+        let pos = vec![[0.0_f32; 3]; 3];
+        let idx1 = vec![0_u32, 1, 2];
+        let idx2 = vec![0_u32, 2, 1];
+        assert_ne!(compute_mesh_hash(&pos, &idx1), compute_mesh_hash(&pos, &idx2));
+    }
+
+    #[test]
+    fn hash_distinguishes_count_from_value() {
+        // Two empty inputs must hash distinctly from a single-zero
+        // input — guards against the count-fold being load-bearing.
+        let h_empty = compute_mesh_hash(&[], &[]);
+        let h_one = compute_mesh_hash(&[[0.0_f32; 3]], &[0_u32]);
+        assert_ne!(h_empty, h_one);
+    }
+
+    #[test]
+    fn store_then_load_roundtrips() {
+        // Skip when the env doesn't expose a cache dir (CI sandbox can do this).
+        let Some(_) = cache_dir() else { return; };
+        // Use a hash unlikely to collide with anything else's tests.
+        let h = MeshHash(0xfeed_cafe_dead_beef);
+        let bytes: Vec<u8> = (0..VOXEL_BYTES).map(|i| (i * 7 + 13) as u8).collect();
+        store(h, &bytes).expect("store");
+        let got = load(h).expect("load hit");
+        assert_eq!(got, bytes);
+        // Cleanup so the test is repeatable.
+        if let Some(p) = cache_path(h) { let _ = fs::remove_file(p); }
+    }
+
+    #[test]
+    fn load_miss_returns_none() {
+        let Some(_) = cache_dir() else { return; };
+        let h = MeshHash(0x0000_0000_dead_dead);
+        if let Some(p) = cache_path(h) { let _ = fs::remove_file(p); }
+        assert!(load(h).is_none());
+    }
+
+    #[test]
+    fn store_rejects_wrong_size() {
+        let h = MeshHash(0);
+        assert!(store(h, &[0u8; 100]).is_err());
+    }
+
+    #[test]
+    fn load_rejects_wrong_magic() {
+        let Some(dir) = cache_dir() else { return; };
+        let _ = fs::create_dir_all(&dir);
+        let h = MeshHash(0xbad_0_bad_1);
+        let p = dir.join(h.to_filename());
+        // Hand-write a file with the wrong magic.
+        let mut bad = vec![0u8; 16 + VOXEL_BYTES];
+        bad[..6].copy_from_slice(b"NOTBLM");
+        std::fs::write(&p, &bad).unwrap();
+        assert!(load(h).is_none());
+        let _ = fs::remove_file(p);
+    }
+}

From 7edab877ae83d83f3dd3591dbb59cb457d65af65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralph=20K=C3=BCpper?= <ralph.kuepper@skelpo.com>
Date: Sat, 16 May 2026 11:10:41 +0200
Subject: [PATCH 2/6] chore(pkg): prep @bloomengine/engine for npm publish
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename from bloom to @bloomengine/engine (Perry FFI module ref
updated to match) and add a files: allowlist so the tarball ships
just TS sources, Rust crates, shaders/assets, the bloom_jolt shim,
and JoltPhysics/Jolt — vendored at publish time rather than fetched
at install so installs stay self-contained and reproducible.

.npmignore is belt-and-suspenders against target/, pkg/, build/,
and the multi-MB Jolt extras (Samples/Docs/UnitTests/Assets/…).
scripts/prepack.sh refuses to publish if the Jolt submodule isn't
initialised — deliberately not auto-initing so we don't silently
publish stale refs. Root MIT LICENSE added so the legal terms
travel with the package.
---
 .npmignore         | 54 ++++++++++++++++++++++++++++++++
 LICENSE            | 21 +++++++++++++
 package.json       | 77 ++++++++++++++++++++++++++++++++++++++++++++--
 scripts/prepack.sh | 31 +++++++++++++++++++
 4 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 .npmignore
 create mode 100644 LICENSE
 create mode 100755 scripts/prepack.sh

diff --git a/.npmignore b/.npmignore
new file mode 100644
index 0000000..b3278cf
--- /dev/null
+++ b/.npmignore
@@ -0,0 +1,54 @@
+# The `files:` field in package.json is the source of truth for what
+# ships. This .npmignore is belt-and-suspenders — it strips artifacts
+# from globbed directories that `files:` pulls in, so we never ship
+# build output, IDE state, or local caches even by accident.
+
+# Rust build output
+target/
+**/target/
+*.rlib
+*.rmeta
+
+# wasm-pack output (built per consumer; not part of the package)
+native/web/pkg/
+
+# Native build dirs
+native/third_party/bloom_jolt/build/
+native/third_party/JoltPhysics/Build/
+
+# Jolt submodule extras we don't need at consumer build time.
+# We only ship JoltPhysics/Jolt/ (the actual sources our cmake builds
+# against) plus LICENSE + README. Everything else is samples, viewer,
+# docs, assets, tests — multi-MB and irrelevant for embedding.
+native/third_party/JoltPhysics/.git
+native/third_party/JoltPhysics/.github/
+native/third_party/JoltPhysics/Assets/
+native/third_party/JoltPhysics/Build/
+native/third_party/JoltPhysics/Docs/
+native/third_party/JoltPhysics/HelloWorld/
+native/third_party/JoltPhysics/JoltViewer/
+native/third_party/JoltPhysics/PerformanceTest/
+native/third_party/JoltPhysics/Samples/
+native/third_party/JoltPhysics/TestFramework/
+native/third_party/JoltPhysics/UnitTests/
+native/third_party/JoltPhysics/Doxyfile
+native/third_party/JoltPhysics/run_doxygen.bat
+native/third_party/JoltPhysics/sonar-project.properties
+native/third_party/JoltPhysics/ContributorAgreement.md
+
+# Perry build artifacts
+*.ts.o
+*_ts.o
+.perry-cache/
+dist/
+
+# OS / editor junk
+.DS_Store
+.vscode/
+.idea/
+*.swp
+
+# Local-only state never meant for the registry
+.claude/
+node_modules/
+package-lock.json
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..580204d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Bloom Engine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/package.json b/package.json
index b40c9c3..886c7ef 100644
--- a/package.json
+++ b/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "bloom",
+  "name": "@bloomengine/engine",
   "version": "0.3.1",
   "description": "Bloom Engine: native TypeScript game engine compiled by Perry",
   "main": "src/index.ts",
@@ -18,10 +18,83 @@
     "./physics": "./src/physics/index.ts",
     "./world": "./src/world/index.ts"
   },
+  "files": [
+    "src/",
+    "perry.config.ts",
+    "native/shared/Cargo.toml",
+    "native/shared/Cargo.lock",
+    "native/shared/build.rs",
+    "native/shared/src/**",
+    "native/shared/shaders/**",
+    "native/shared/assets/**",
+    "native/macos/Cargo.toml",
+    "native/macos/Cargo.lock",
+    "native/macos/src/**",
+    "native/ios/Cargo.toml",
+    "native/ios/Cargo.lock",
+    "native/ios/src/**",
+    "native/tvos/Cargo.toml",
+    "native/tvos/Cargo.lock",
+    "native/tvos/src/**",
+    "native/watchos/Cargo.toml",
+    "native/watchos/Cargo.lock",
+    "native/watchos/src/**",
+    "native/watchos/shaders/**",
+    "native/windows/Cargo.toml",
+    "native/windows/Cargo.lock",
+    "native/windows/src/**",
+    "native/linux/Cargo.toml",
+    "native/linux/Cargo.lock",
+    "native/linux/src/**",
+    "native/android/Cargo.toml",
+    "native/android/Cargo.lock",
+    "native/android/src/**",
+    "native/web/Cargo.toml",
+    "native/web/Cargo.lock",
+    "native/web/src/**",
+    "native/web/build.sh",
+    "native/web/index.html",
+    "native/web/bloom_glue.js",
+    "native/web/jolt_bridge.js",
+    "native/third_party/bloom_jolt/CMakeLists.txt",
+    "native/third_party/bloom_jolt/include/**",
+    "native/third_party/bloom_jolt/src/**",
+    "native/third_party/JoltPhysics/Jolt/**",
+    "native/third_party/JoltPhysics/LICENSE",
+    "native/third_party/JoltPhysics/README.md"
+  ],
+  "scripts": {
+    "prepack": "scripts/prepack.sh"
+  },
+  "keywords": [
+    "bloom",
+    "game-engine",
+    "perry",
+    "typescript",
+    "native",
+    "wgpu",
+    "metal",
+    "directx",
+    "vulkan",
+    "webgpu",
+    "jolt",
+    "physics"
+  ],
   "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/bloomengine/engine.git"
+  },
+  "bugs": {
+    "url": "https://github.com/bloomengine/engine/issues"
+  },
+  "homepage": "https://github.com/bloomengine/engine#readme",
+  "publishConfig": {
+    "access": "public"
+  },
   "perry": {
     "nativeLibrary": {
-      "module": "bloom",
+      "module": "@bloomengine/engine",
       "functions": [
         { "name": "bloom_init_window",              "params": ["f64", "f64", "i64", "f64"],                              "returns": "void" },
         { "name": "bloom_close_window",             "params": [],                                                        "returns": "void" },
diff --git a/scripts/prepack.sh b/scripts/prepack.sh
new file mode 100755
index 0000000..605ee5c
--- /dev/null
+++ b/scripts/prepack.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Runs immediately before `npm pack` / `npm publish` assembles the
+# tarball. Its job is to verify the working tree contains everything
+# the published package needs — most importantly, the JoltPhysics
+# submodule materialised on disk, since the tarball vendors its
+# sources rather than relying on a postinstall `git clone`.
+#
+# We deliberately do NOT auto-init the submodule here: doing so would
+# silently publish whatever ref the submodule happens to point at,
+# even if it's stale or uncommitted. A loud failure forces a
+# deliberate `git submodule update --init` before publishing.
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+JOLT_DIR="$ROOT/native/third_party/JoltPhysics"
+JOLT_SRC="$JOLT_DIR/Jolt"
+
+if [ ! -d "$JOLT_SRC" ] || [ -z "$(ls -A "$JOLT_SRC" 2>/dev/null)" ]; then
+  echo "prepack: JoltPhysics submodule is not initialised." >&2
+  echo "         Expected sources at: $JOLT_SRC" >&2
+  echo "         Run: git submodule update --init --recursive" >&2
+  exit 1
+fi
+
+if [ ! -f "$JOLT_DIR/LICENSE" ]; then
+  echo "prepack: JoltPhysics/LICENSE missing — refusing to publish without upstream license." >&2
+  exit 1
+fi
+
+echo "prepack: JoltPhysics sources present ($(du -sh "$JOLT_SRC" | cut -f1)). OK."

From 2dd358c194135c6ebaa2e3f1910d7465b0bae6cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralph=20K=C3=BCpper?= <ralph.kuepper@skelpo.com>
Date: Sat, 16 May 2026 11:11:10 +0200
Subject: [PATCH 3/6] chore(pkg): fix GitHub URLs to match actual
 Bloom-Engine/engine remote

---
 package.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package.json b/package.json
index 886c7ef..a03055b 100644
--- a/package.json
+++ b/package.json
@@ -83,12 +83,12 @@
   "license": "MIT",
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/bloomengine/engine.git"
+    "url": "git+https://github.com/Bloom-Engine/engine.git"
   },
   "bugs": {
-    "url": "https://github.com/bloomengine/engine/issues"
+    "url": "https://github.com/Bloom-Engine/engine/issues"
   },
-  "homepage": "https://github.com/bloomengine/engine#readme",
+  "homepage": "https://github.com/Bloom-Engine/engine#readme",
   "publishConfig": {
     "access": "public"
   },

From 04a3678acc4ed76267372f9d9a41a48de459e433 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralph=20K=C3=BCpper?= <ralph.kuepper@skelpo.com>
Date: Sat, 16 May 2026 11:15:07 +0200
Subject: [PATCH 4/6] docs(pkg): npm install instructions and
 @bloomengine/engine imports

Now that the package is published to npm, swap every documented
import from "bloom" to "@bloomengine/engine" so the snippets actually
resolve against an installed package. README also gains an Install
section up front pointing at npm (plus bun/pnpm/yarn equivalents) and
the toolchain prereqs (Perry + Rust, wasm-pack for web).
---
 README.md                  | 44 ++++++++++++++++++++++++++++----------
 docs/skeletal-animation.md |  4 ++--
 docs/web-target.md         |  4 ++--
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 4f2670f..af125d3 100644
--- a/README.md
+++ b/README.md
@@ -5,11 +5,33 @@
 Write TypeScript. Ship native games — and now the web too.
 Bloom compiles your game to Metal, DirectX 12, Vulkan, OpenGL, and WebGPU — one codebase for every platform.
 
+## Install
+
+```bash
+npm install @bloomengine/engine
+```
+
+Or with your preferred package manager:
+
+```bash
+bun add @bloomengine/engine
+pnpm add @bloomengine/engine
+yarn add @bloomengine/engine
+```
+
+The npm package ships the TypeScript API alongside the engine's Rust sources and the bundled [JoltPhysics](https://github.com/jrouwe/JoltPhysics) C++ shim, so a single `install` is enough — there's no separate native download step.
+
+You'll also need:
+
+- **Perry** — the TypeScript AOT compiler that turns your game into a native binary or WASM module. It also drives the engine's native build.
+- **Rust toolchain** ([rustup.rs](https://rustup.rs)) — Perry invokes Cargo to compile the engine's platform crate the first time you build for each target.
+- For web builds only: [wasm-pack](https://rustwasm.github.io/wasm-pack/installer/) (`cargo install wasm-pack`).
+
 ## Quick Start
 
 ```typescript
 import { initWindow, windowShouldClose, beginDrawing,
-         endDrawing, clearBackground, drawText, Colors } from "bloom";
+         endDrawing, clearBackground, drawText, Colors } from "@bloomengine/engine";
 
 initWindow(800, 450, "My Game");
 
@@ -26,7 +48,7 @@ while (!windowShouldClose()) {
 Use `runGame()` for code that works on both native and web:
 
 ```typescript
-import { initWindow, runGame, clearBackground, drawText, Colors } from "bloom";
+import { initWindow, runGame, clearBackground, drawText, Colors } from "@bloomengine/engine";
 
 initWindow(800, 450, "My Game");
 
@@ -55,14 +77,14 @@ cd dist/web && python3 -m http.server 8080
 
 | Module | Import | Description |
 |--------|--------|-------------|
-| **Core** | `bloom/core` | Window, game loop, input, timing |
-| **Shapes** | `bloom/shapes` | 2D drawing + collision detection |
-| **Textures** | `bloom/textures` | Image loading, sprite batching |
-| **Text** | `bloom/text` | TTF/OTF font loading and rendering |
-| **Audio** | `bloom/audio` | Sound effects + music streaming |
-| **Models** | `bloom/models` | 3D model loading (glTF, OBJ), skeletal animation |
-| **Math** | `bloom/math` | Vectors, matrices, quaternions, easing |
-| **Physics** | `bloom/physics` | Jolt-backed rigid + soft bodies, character, vehicles ([docs](docs/physics.md)) |
+| **Core** | `@bloomengine/engine/core` | Window, game loop, input, timing |
+| **Shapes** | `@bloomengine/engine/shapes` | 2D drawing + collision detection |
+| **Textures** | `@bloomengine/engine/textures` | Image loading, sprite batching |
+| **Text** | `@bloomengine/engine/text` | TTF/OTF font loading and rendering |
+| **Audio** | `@bloomengine/engine/audio` | Sound effects + music streaming |
+| **Models** | `@bloomengine/engine/models` | 3D model loading (glTF, OBJ), skeletal animation |
+| **Math** | `@bloomengine/engine/math` | Vectors, matrices, quaternions, easing |
+| **Physics** | `@bloomengine/engine/physics` | Jolt-backed rigid + soft bodies, character, vehicles ([docs](docs/physics.md)) |
 
 ## Platforms
 
@@ -143,7 +165,7 @@ Bloom supports GPU-accelerated skeletal animation via glTF/GLB models. The pipel
 
 ```typescript
 import { loadModel, loadModelAnimation, updateModelAnimation, drawModel,
-         getTime, Colors } from "bloom";
+         getTime, Colors } from "@bloomengine/engine";
 
 const character = loadModel("assets/models/character.glb");
 const anim = loadModelAnimation("assets/models/character.glb");
diff --git a/docs/skeletal-animation.md b/docs/skeletal-animation.md
index f9c15f4..57d4d7d 100644
--- a/docs/skeletal-animation.md
+++ b/docs/skeletal-animation.md
@@ -136,7 +136,7 @@ Joint matrices are written to the GPU in `end_frame()` via `flush_joint_matrices
 ### Loading
 
 ```typescript
-import { loadModel, loadModelAnimation, drawModel, updateModelAnimation } from "bloom";
+import { loadModel, loadModelAnimation, drawModel, updateModelAnimation } from "@bloomengine/engine";
 
 // Load the mesh (vertices with skin data: JOINTS_0 + WEIGHTS_0)
 const model = loadModel("assets/models/character.glb");
@@ -179,7 +179,7 @@ drawModel(model, { x: playerX, y: playerY, z: playerZ }, 1.0, WHITE);
 ```typescript
 import { initWindow, windowShouldClose, beginDrawing, endDrawing,
          clearBackground, loadModel, loadModelAnimation,
-         updateModelAnimation, drawModel, getTime, Colors } from "bloom";
+         updateModelAnimation, drawModel, getTime, Colors } from "@bloomengine/engine";
 
 initWindow(800, 600, "Animation Demo");
 
diff --git a/docs/web-target.md b/docs/web-target.md
index 76719a4..b92c738 100644
--- a/docs/web-target.md
+++ b/docs/web-target.md
@@ -54,7 +54,7 @@ python3 -m http.server 8080
 Browsers cannot run blocking `while` loops. Use `runGame()` instead:
 
 ```typescript
-import { initWindow, runGame, clearBackground, drawRect, Colors } from "bloom";
+import { initWindow, runGame, clearBackground, drawRect, Colors } from "@bloomengine/engine";
 
 initWindow(800, 600, "My Game");
 
@@ -113,7 +113,7 @@ if (fileExists("save.json")) {
 ## Platform Detection
 
 ```typescript
-import { getPlatform, Platform } from "bloom";
+import { getPlatform, Platform } from "@bloomengine/engine";
 
 if (getPlatform() === Platform.WEB) {
   // web-specific code

From e3aef89432c7831a3d8622e8fe43ef42b0bddb46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralph=20K=C3=BCpper?= <ralph.kuepper@skelpo.com>
Date: Sat, 16 May 2026 11:15:07 +0200
Subject: [PATCH 5/6] ci(release): publish @bloomengine/engine to npm on tag
 release

Adds a publish-npm job to the existing tag-driven release workflow.
Runs after github-release so a failed publish doesn't leave a
release-but-no-package state, and after the await-tests gate so we
never ship a tag that didn't pass CI.

Idempotent: re-checks npm before publishing and skips cleanly if the
version already exists (so workflow_dispatch on an old tag won't
double-publish). Checks out submodules recursively because the
prepack hook refuses to ship without JoltPhysics sources on disk,
and uses --provenance for the npm attestation badge.

Requires an NPM_TOKEN repo secret with publish rights on the
@bloomengine scope.
---
 .github/workflows/release.yml | 76 ++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 68b32a6..436b87b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -2,11 +2,15 @@ name: Release
 
 # Fires on a version tag push (e.g. `v0.3.2`). Gates on the Tests workflow
 # passing for the exact same commit, then creates/updates the GitHub Release
-# and (once wired up) would publish to npm.
+# and publishes the package to npm as @bloomengine/engine.
 #
 # The /release Claude Code skill in .claude/skills/release/ drives this end
 # to end: it bumps the version in package.json, commits, tags, pushes, and
 # waits for this workflow to go green.
+#
+# Required secrets:
+#   NPM_TOKEN — automation token for the @bloomengine npm scope with
+#               "Publish" permission on @bloomengine/engine.
 
 on:
   push:
@@ -147,3 +151,73 @@ jobs:
           else
             echo "OK  package.json version matches tag ($VERSION)"
           fi
+
+  # ---------------------------------------------------------------------------
+  # Publish the package to npm as @bloomengine/engine. Runs after the GitHub
+  # Release so a failure here doesn't leave a dangling release-but-no-package
+  # state. Skips cleanly if the version is already on the registry, which
+  # keeps re-runs idempotent (workflow_dispatch on an existing tag won't
+  # double-publish or fail).
+  #
+  # We check out submodules recursively because scripts/prepack.sh refuses
+  # to ship a tarball without the JoltPhysics sources materialised — the
+  # package vendors them rather than relying on a postinstall git clone.
+  # ---------------------------------------------------------------------------
+  publish-npm:
+    needs: github-release
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write   # required for npm provenance attestations
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - uses: actions/setup-node@v5
+        with:
+          node-version: "24"
+          registry-url: "https://registry.npmjs.org"
+
+      - name: Resolve tag
+        id: tag
+        env:
+          DISPATCH_TAG: ${{ github.event.inputs.tag }}
+        run: |
+          if [ -n "$DISPATCH_TAG" ]; then
+            TAG="$DISPATCH_TAG"
+          else
+            TAG="${GITHUB_REF#refs/tags/}"
+          fi
+          echo "tag=$TAG" >> "$GITHUB_OUTPUT"
+          echo "version=${TAG#v}" >> "$GITHUB_OUTPUT"
+
+      - name: Verify package.json version matches tag
+        env:
+          VERSION: ${{ steps.tag.outputs.version }}
+          TAG: ${{ steps.tag.outputs.tag }}
+        run: |
+          PKG_VERSION=$(node -p "require('./package.json').version")
+          if [ "$PKG_VERSION" != "$VERSION" ]; then
+            echo "::error::Tag $TAG ($VERSION) does not match package.json ($PKG_VERSION) — refusing to publish."
+            exit 1
+          fi
+
+      - name: Check if version already published
+        id: check
+        run: |
+          PKG_NAME=$(node -p "require('./package.json').name")
+          PKG_VERSION=$(node -p "require('./package.json').version")
+          if npm view "$PKG_NAME@$PKG_VERSION" version >/dev/null 2>&1; then
+            echo "$PKG_NAME@$PKG_VERSION is already on the registry — skipping publish."
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "$PKG_NAME@$PKG_VERSION not yet published — will publish."
+            echo "skip=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Publish to npm
+        if: steps.check.outputs.skip == 'false'
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: npm publish --provenance --access public

From ed90787d101b3d3a3d2536bde600a51cdd2e7a30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ralph=20K=C3=BCpper?= <ralph.kuepper@skelpo.com>
Date: Sat, 16 May 2026 11:17:15 +0200
Subject: [PATCH 6/6] ci(release): drop NPM_TOKEN, use npm trusted publishing
 via OIDC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The package is now configured on npmjs.com with this workflow as a
trusted publisher, so `id-token: write` is sufficient — npm publish
exchanges the GitHub OIDC token for a short-lived credential. No
long-lived NPM_TOKEN secret to rotate or leak, and provenance
attestation is automatic.
---
 .github/workflows/release.yml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 436b87b..97a69da 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -8,9 +8,12 @@ name: Release
 # to end: it bumps the version in package.json, commits, tags, pushes, and
 # waits for this workflow to go green.
 #
-# Required secrets:
-#   NPM_TOKEN — automation token for the @bloomengine npm scope with
-#               "Publish" permission on @bloomengine/engine.
+# Authentication: npm trusted publishing. The @bloomengine/engine package is
+# configured on npmjs.com with this workflow (Bloom-Engine/engine →
+# .github/workflows/release.yml → job publish-npm) as a trusted publisher.
+# `id-token: write` on the publish job is enough — `npm publish` exchanges
+# the GitHub OIDC token for a short-lived publish credential, no NPM_TOKEN
+# secret needed. Provenance attestation is automatic under this flow.
 
 on:
   push:
@@ -218,6 +221,4 @@ jobs:
 
       - name: Publish to npm
         if: steps.check.outputs.skip == 'false'
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
         run: npm publish --provenance --access public