From eadf596f18499b557890ec013a108a1837c6bbb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Fri, 1 May 2026 10:46:38 +0200 Subject: [PATCH 1/6] feat(renderer): on-disk cache for per-mesh SDFs (closes #22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cold launches re-baked every per-mesh 32³ R32Float SDF from scratch even though the same content always produces the same voxel data. Sponza's first 9 frames spent ~8 bakes/frame on this path; second launch spent another 9. This change content-hashes (positions + indices) at GPU upload time, checks a platform-appropriate cache directory, and `queue.write_texture`s the cached voxel bytes directly when the file exists — bypassing the GPU dispatch entirely. Misses fall through to the existing bake; the renderer encodes a copy_texture_to_buffer alongside each dispatch and persists the readback to disk after the frame's main submit. The next launch hits and skips the bake. The cache is best-effort throughout: a corrupt entry, missing dir, or write failure silently re-bakes. wasm32 has no filesystem path so load returns None and store is gated out — web builds bake every launch as before. Cache layout: - macOS / iOS / tvOS / watchOS: ~/Library/Caches/bloom/sdf - Linux / Android: $XDG_CACHE_HOME/bloom/sdf - Windows: %LOCALAPPDATA%\bloom\cache\sdf - 16 B header (magic + version + voxel_res) + 128 KB R32Float payload Sponza disk footprint: 68 × 128 KB = 8.7 MB (matches the issue's budget). Disk reads happen synchronously at upload — a 128 KB read from local cache is sub-millisecond. The synchronous device.poll(Wait) on flush blocks for the bake submission to finish before persisting; this is a cold-launch-only stall (~9 frames) and the bake itself is the bottleneck on those frames anyway. Async pipelining is a follow-up if the cold-launch stall ever shows up in profiles. 8 new unit tests cover hash stability, change-detection on positions and indices, count-vs-value distinguishability, store/load round-trip, miss handling, size validation, and bad-magic rejection. cargo test 74/0 (was 66/0) on macOS, wasm32 cargo check clean. --- native/shared/src/lib.rs | 1 + native/shared/src/renderer/mod.rs | 128 +++++++++++++- native/shared/src/scene.rs | 69 +++++++- native/shared/src/sdf_cache.rs | 274 ++++++++++++++++++++++++++++++ 4 files changed, 463 insertions(+), 9 deletions(-) create mode 100644 native/shared/src/sdf_cache.rs diff --git a/native/shared/src/lib.rs b/native/shared/src/lib.rs index 541b461..940c8c8 100644 --- a/native/shared/src/lib.rs +++ b/native/shared/src/lib.rs @@ -20,6 +20,7 @@ pub mod postfx; pub mod custom_shaders; pub mod staging; pub mod profiler; +pub mod sdf_cache; // Jolt C ABI + Rust wrapper live on native only. On wasm32 the web crate // routes bloom_physics_* calls through wasm_bindgen to JoltPhysics.js; // no Rust-side Jolt integration is needed. diff --git a/native/shared/src/renderer/mod.rs b/native/shared/src/renderer/mod.rs index 5d20519..d0ae9de 100644 --- a/native/shared/src/renderer/mod.rs +++ b/native/shared/src/renderer/mod.rs @@ -1055,6 +1055,13 @@ pub struct Renderer { pub sdf_bake_pipeline: wgpu::ComputePipeline, pub sdf_bake_layout: wgpu::BindGroupLayout, pub sdf_bake_uniform: wgpu::Buffer, + /// Ticket 022 — staging buffers awaiting readback so freshly-baked + /// per-mesh SDFs can be written to the disk cache. Populated by + /// `bake_pending_sdfs` (one entry per dispatch); drained by + /// `flush_sdf_cache_writes` after the frame's main submit, which + /// maps each buffer, copies bytes to the cache file, and drops it. + /// Empty on cache-hit frames and after cold launch finishes. + sdf_cache_writes: Vec<(crate::sdf_cache::MeshHash, wgpu::Buffer)>, // --- Ticket 014 V2: scene-wide SDF clipmap --- pub scene_sdf_clipmap_tex: wgpu::Texture, @@ -6040,6 +6047,7 @@ impl Renderer { sdf_bake_pipeline, sdf_bake_layout, sdf_bake_uniform, + sdf_cache_writes: Vec::new(), scene_sdf_clipmap_tex, scene_sdf_clipmap_view, scene_sdf_clipmap_built: false, @@ -7183,6 +7191,13 @@ impl Renderer { /// per-frame budget; expensive workload (O(voxels × triangles) /// per mesh), so the rate-limit keeps first-frame stutter /// bounded. Static scenes amortise and never re-bake. + /// + /// Ticket 022 — after each dispatch, encode a copy_texture_to_buffer + /// against a fresh staging buffer and stash (hash, buffer) on + /// `sdf_cache_writes`. The frame's main submit picks up the copies + /// alongside the bake; `flush_sdf_cache_writes` then maps and + /// persists each buffer to the on-disk cache so the next launch + /// hits the load path in scene.rs and skips the bake entirely. fn bake_pending_sdfs( &mut self, scene: &mut crate::scene::SceneGraph, @@ -7196,18 +7211,21 @@ impl Renderer { let pending: Vec = scene.pending_sdf_bakes.drain(..take).collect(); for handle in pending { - let (sdf_view, vb_ptr, ib_ptr, bmin, bmax, index_count) = { + let (sdf_tex, sdf_view, vb_ptr, ib_ptr, bmin, bmax, index_count, mesh_hash) = { let Some(node) = scene.nodes.get(handle) else { continue; }; + let Some(sdf_tex) = node.mesh_sdf.as_ref() else { continue; }; let Some(sdf_view) = node.mesh_sdf_view.as_ref() else { continue; }; let Some(vb) = node.gpu_vb.as_ref() else { continue; }; let Some(ib) = node.gpu_ib.as_ref() else { continue; }; ( + sdf_tex.clone(), sdf_view.clone(), vb.clone(), ib.clone(), node.bounds_min, node.bounds_max, node.gpu_index_count, + node.mesh_hash, ) }; if index_count == 0 { @@ -7234,13 +7252,95 @@ impl Renderer { wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&sdf_view) }, ], }); - let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { - label: Some("sdf_bake_pass"), - timestamp_writes: None, - }); - pass.set_pipeline(&self.sdf_bake_pipeline); - pass.set_bind_group(0, &bg, &[]); - pass.dispatch_workgroups(MESH_SDF_RES / 4, MESH_SDF_RES / 4, MESH_SDF_RES / 4); + { + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("sdf_bake_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&self.sdf_bake_pipeline); + pass.set_bind_group(0, &bg, &[]); + pass.dispatch_workgroups(MESH_SDF_RES / 4, MESH_SDF_RES / 4, MESH_SDF_RES / 4); + } + + // Ticket 022 — schedule a readback against the freshly-baked + // texture so the next launch can skip the bake. We only do + // this when scene.rs computed a hash (it always does, but + // skip defensively); padded staging size is bound by + // wgpu's COPY_BYTES_PER_ROW alignment. + if let Some(hash) = mesh_hash { + let row_padded = ((MESH_SDF_RES * 4 + 255) & !255) as u64; + let staging = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("sdf_cache_readback"), + size: row_padded * (MESH_SDF_RES as u64) * (MESH_SDF_RES as u64), + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + encoder.copy_texture_to_buffer( + wgpu::TexelCopyTextureInfo { + texture: &sdf_tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyBufferInfo { + buffer: &staging, + layout: wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(row_padded as u32), + rows_per_image: Some(MESH_SDF_RES), + }, + }, + wgpu::Extent3d { + width: MESH_SDF_RES, + height: MESH_SDF_RES, + depth_or_array_layers: MESH_SDF_RES, + }, + ); + self.sdf_cache_writes.push((hash, staging)); + } + } + } + + /// Ticket 022 — drain pending SDF cache writes after the frame's + /// main submit. Maps each staging buffer in one pass (single + /// `device.poll(Wait)` covers all of them), unpads the row-aligned + /// payload back to the tightly-packed on-disk layout, and writes + /// to the cache. Best-effort throughout: a write failure is + /// silently ignored — the next cold launch just rebakes. + pub fn flush_sdf_cache_writes(&mut self) { + if self.sdf_cache_writes.is_empty() { return; } + let entries = std::mem::take(&mut self.sdf_cache_writes); + + // Issue map_async on every buffer up front so a single poll + // resolves all of them — much cheaper than serially polling + // per buffer when 8 cold-launch bakes complete in one frame. + for (_, buf) in &entries { + let slice = buf.slice(..); + slice.map_async(wgpu::MapMode::Read, |_| { /* polled below */ }); + } + let _ = self.device.poll(wgpu::PollType::Wait { submission_index: None, timeout: None }); + + let row_tight = (MESH_SDF_RES * 4) as usize; + let row_padded = ((MESH_SDF_RES * 4 + 255) & !255) as usize; + let res = MESH_SDF_RES as usize; + + for (hash, buf) in entries { + let slice = buf.slice(..); + let data = slice.get_mapped_range(); + // Strip the wgpu-required row padding back to a tight + // 32³ × 4-byte payload before storing. + let mut tight = vec![0u8; res * res * row_tight]; + for z in 0..res { + for y in 0..res { + let src_off = (z * res + y) * row_padded; + let dst_off = (z * res + y) * row_tight; + tight[dst_off..dst_off + row_tight] + .copy_from_slice(&data[src_off..src_off + row_tight]); + } + } + drop(data); + buf.unmap(); + let _ = crate::sdf_cache::store(hash, &tight); } } @@ -11363,6 +11463,18 @@ impl Renderer { profiler.end("queue_submit"); } + // Ticket 022 — drain freshly-baked SDFs to the on-disk cache. + // No-op on cache-hit frames (queue is empty); on cold-launch + // bake frames it blocks briefly on a single device.poll(Wait) + // covering all 8 readbacks. Skipped on wasm32 (no filesystem + // path, sdf_cache::store returns Err immediately). + #[cfg(not(target_arch = "wasm32"))] + { + profiler.begin("sdf_cache_write"); + self.flush_sdf_cache_writes(); + profiler.end("sdf_cache_write"); + } + #[cfg(target_arch = "wasm32")] { self.queue.submit(std::iter::once(encoder.finish())); diff --git a/native/shared/src/scene.rs b/native/shared/src/scene.rs index d9d6d73..5791d46 100644 --- a/native/shared/src/scene.rs +++ b/native/shared/src/scene.rs @@ -128,6 +128,12 @@ pub struct SceneNode { /// `None` on non-RT-capable adapters or until the bake lands. pub mesh_sdf: Option, pub mesh_sdf_view: Option, + /// Content hash of (positions, indices) computed at upload time. + /// Set whenever `mesh_sdf` exists; the renderer reads it back when + /// flushing cache writes after a fresh bake. `None` until the + /// first geo upload — and on non-RT-capable adapters that never + /// allocate a per-mesh SDF. + pub mesh_hash: Option, /// Flat mesh-average world-space normal, cached on BLAS build so /// the per-instance GI data buffer can be populated without /// re-reading the vertex array. Rough heuristic — for walls and @@ -185,6 +191,7 @@ impl SceneNode { card_dynamic: false, mesh_sdf: None, mesh_sdf_view: None, + mesh_hash: None, flat_normal_ws: [0.0, 1.0, 0.0], flat_albedo: [1.0, 1.0, 1.0], uniform_slot: None, @@ -752,9 +759,69 @@ impl SceneGraph { device, "scene_node_sdf", ); + + // Ticket 022 — content-hash the geometry and + // try the on-disk SDF cache before scheduling + // a GPU bake. Vertex layout is interleaved; + // pull the position prefix out as a + // [[f32; 3]] slice so the hash only sees + // geometry-relevant bytes. + let positions: Vec<[f32; 3]> = + node.vertices.iter().map(|v| v.position).collect(); + let hash = crate::sdf_cache::compute_mesh_hash( + &positions, &node.indices, + ); + node.mesh_hash = Some(hash); + + if let Some(bytes) = crate::sdf_cache::load(hash) { + // Cache hit — pad the tightly-packed + // 128 B/row payload to 256 B/row so it + // clears wgpu's COPY_BYTES_PER_ROW + // alignment, then upload directly and + // skip the bake. Native cache size + // stays compact (128 KB/mesh on disk); + // the 128 KB padding allocation is + // free'd immediately after the call. + const RES: u32 = crate::sdf_cache::VOXEL_RES; + let row_tight = (RES * 4) as usize; + let row_padded = ((row_tight + 255) & !255) as u32; + let mut padded = vec![ + 0u8; + (row_padded as usize) * (RES as usize) * (RES as usize) + ]; + for z in 0..RES as usize { + for y in 0..RES as usize { + let src_off = (z * RES as usize + y) * row_tight; + let dst_off = (z * RES as usize + y) * row_padded as usize; + padded[dst_off..dst_off + row_tight] + .copy_from_slice(&bytes[src_off..src_off + row_tight]); + } + } + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &sdf_tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + &padded, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(row_padded), + rows_per_image: Some(RES), + }, + wgpu::Extent3d { + width: RES, + height: RES, + depth_or_array_layers: RES, + }, + ); + } else { + pending_sdf.push(handle); + } + node.mesh_sdf = Some(sdf_tex); node.mesh_sdf_view = Some(sdf_view); - pending_sdf.push(handle); } } } diff --git a/native/shared/src/sdf_cache.rs b/native/shared/src/sdf_cache.rs new file mode 100644 index 0000000..3fd7332 --- /dev/null +++ b/native/shared/src/sdf_cache.rs @@ -0,0 +1,274 @@ +//! Disk cache for per-mesh signed distance fields baked by ticket 014. +//! +//! Each mesh content-hashes (positions + indices) to a 64-bit key; the +//! 32³ R32Float voxel data (~128 KB) lives under the platform cache +//! directory. Cold launches that hit the cache skip the GPU +//! brute-force point-triangle bake entirely and `queue.write_texture` +//! the bytes directly. Misses fall through to the existing in-process +//! bake; the renderer reads the texture back on the same frame and +//! writes the cache entry so the next launch hits. +//! +//! Caching is best-effort by design — every fallible operation +//! (hashing aside) returns `Option`/`Result` and the renderer treats +//! an error or `None` as "no cache, just bake." +//! +//! Web isn't supported here. The wasm32 build needs IndexedDB plumbing +//! before it can store anything; until then `cache_dir()` returns +//! `None` on wasm and the bake falls through normally. + +use std::fs; +use std::io::{Read, Write}; +use std::path::PathBuf; + +/// File header written before the raw R32Float voxel bytes. Fixed +/// 16 bytes so a future version can extend without breaking layout +/// readers (the `version` byte is the gate). +const FILE_MAGIC: [u8; 6] = *b"BLSDF\0"; +const FILE_VERSION: u8 = 1; + +/// Voxel resolution we bake at. Mirrors `renderer::formats::MESH_SDF_RES`. +/// Hardcoded here rather than imported to keep the cache module +/// dependency-free of the renderer. +pub const VOXEL_RES: u32 = 32; + +/// Total payload size: 32³ × f32. Exposed for callers sizing a staging +/// buffer or a `queue.write_texture` source slice. +pub const VOXEL_BYTES: usize = (VOXEL_RES as usize).pow(3) * 4; + +/// Content hash of a mesh's geometry. Stable across Rust versions +/// because the underlying mix is FNV-1a over fixed little-endian bytes; +/// renaming the type or adding fields is fine, just don't change the +/// hash math without bumping `FILE_VERSION`. +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub struct MeshHash(pub u64); + +impl MeshHash { + fn to_filename(self) -> String { + format!("{:016x}.sdf", self.0) + } +} + +/// FNV-1a 64-bit. Algorithmically frozen — changes here invalidate +/// every existing cache entry without warning. +fn fnv1a(input: &[u8]) -> u64 { + let mut h: u64 = 0xcbf29ce484222325; + for &b in input { + h ^= b as u64; + h = h.wrapping_mul(0x100000001b3); + } + h +} + +/// Compute a stable 64-bit content hash for the SDF input. Only +/// position bits and indices feed in — meshes that share a surface +/// but differ in normals/uv/colour share a cache entry, which is +/// correct: SDF is geometry-only. +/// +/// `positions` is the contiguous `[f32; 3]` slice (one entry per +/// vertex). Callers with an interleaved vertex stride extract just +/// the position component before calling. +pub fn compute_mesh_hash(positions: &[[f32; 3]], indices: &[u32]) -> MeshHash { + let mut h: u64 = 0xcbf29ce484222325; + let mix = |h: &mut u64, b: u8| { + *h ^= b as u64; + *h = h.wrapping_mul(0x100000001b3); + }; + for p in positions { + for c in p { + for b in c.to_bits().to_le_bytes() { + mix(&mut h, b); + } + } + } + for i in indices { + for b in i.to_le_bytes() { + mix(&mut h, b); + } + } + // Fold the vertex/index counts in so two meshes that share a + // prefix can't accidentally collide via positions-as-suffix. + for b in (positions.len() as u64).to_le_bytes() { + mix(&mut h, b); + } + for b in (indices.len() as u64).to_le_bytes() { + mix(&mut h, b); + } + let _ = fnv1a; // expose the helper for any future direct use + MeshHash(h) +} + +/// Platform cache root. Returns `None` when the host has no usable +/// cache directory (wasm) or when the env vars used to derive the +/// path aren't set. +/// +/// Resolution order: +/// - macOS / iOS / tvOS / watchOS: `$HOME/Library/Caches/bloom/sdf` +/// - Linux / Android: `${XDG_CACHE_HOME:-$HOME/.cache}/bloom/sdf` +/// - Windows: `%LOCALAPPDATA%\bloom\cache\sdf` +/// - wasm32: `None` +pub fn cache_dir() -> Option { + #[cfg(target_arch = "wasm32")] + { return None; } + + #[cfg(not(target_arch = "wasm32"))] + { + let dir = if cfg!(target_vendor = "apple") { + let home = std::env::var_os("HOME")?; + PathBuf::from(home).join("Library").join("Caches").join("bloom").join("sdf") + } else if cfg!(target_os = "windows") { + let local = std::env::var_os("LOCALAPPDATA")?; + PathBuf::from(local).join("bloom").join("cache").join("sdf") + } else { + // Linux + Android (XDG-style). + let base = std::env::var_os("XDG_CACHE_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cache")))?; + base.join("bloom").join("sdf") + }; + Some(dir) + } +} + +/// Resolve a hash to its cache file path, creating the cache root if +/// needed. Returns `None` if the cache dir can't be created. +fn cache_path(hash: MeshHash) -> Option { + let dir = cache_dir()?; + if !dir.exists() { + fs::create_dir_all(&dir).ok()?; + } + Some(dir.join(hash.to_filename())) +} + +/// Look up cached voxel bytes. Returns `Some(bytes)` only when the +/// file exists, parses, and matches the expected magic + version + +/// resolution + payload size. Any failure is silently treated as a +/// miss — the caller falls through to the GPU bake. +pub fn load(hash: MeshHash) -> Option> { + let path = cache_path(hash)?; + let mut f = fs::File::open(&path).ok()?; + + let mut header = [0u8; 16]; + f.read_exact(&mut header).ok()?; + if header[..6] != FILE_MAGIC { return None; } + if header[6] != FILE_VERSION { return None; } + // header[7] reserved (alignment pad / future flags). + let res = u32::from_le_bytes(header[8..12].try_into().ok()?); + if res != VOXEL_RES { return None; } + // header[12..16] reserved. + + let mut bytes = Vec::with_capacity(VOXEL_BYTES); + f.read_to_end(&mut bytes).ok()?; + if bytes.len() != VOXEL_BYTES { return None; } + Some(bytes) +} + +/// Write voxel bytes for a mesh hash. Best-effort; an `Err` return +/// means the cache wasn't updated but rendering can continue. +pub fn store(hash: MeshHash, voxel_bytes: &[u8]) -> std::io::Result<()> { + if voxel_bytes.len() != VOXEL_BYTES { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "voxel payload size mismatch", + )); + } + let path = cache_path(hash).ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::Other, "cache directory unavailable") + })?; + + // Write to a temp file and rename so a crash mid-write can never + // leave a partial entry that survives validation. + let tmp = path.with_extension("sdf.tmp"); + { + let mut f = fs::File::create(&tmp)?; + let mut header = [0u8; 16]; + header[..6].copy_from_slice(&FILE_MAGIC); + header[6] = FILE_VERSION; + // header[7] reserved. + header[8..12].copy_from_slice(&VOXEL_RES.to_le_bytes()); + // header[12..16] reserved. + f.write_all(&header)?; + f.write_all(voxel_bytes)?; + f.sync_data()?; + } + fs::rename(&tmp, &path)?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_is_stable_for_identical_input() { + let pos = vec![[0.0_f32, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]; + let idx = vec![0_u32, 1, 2]; + assert_eq!(compute_mesh_hash(&pos, &idx), compute_mesh_hash(&pos, &idx)); + } + + #[test] + fn hash_changes_when_position_changes() { + let pos1 = vec![[0.0_f32, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]; + let pos2 = vec![[0.0_f32, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.001, 0.0]]; + let idx = vec![0_u32, 1, 2]; + assert_ne!(compute_mesh_hash(&pos1, &idx), compute_mesh_hash(&pos2, &idx)); + } + + #[test] + fn hash_changes_when_index_changes() { + let pos = vec![[0.0_f32; 3]; 3]; + let idx1 = vec![0_u32, 1, 2]; + let idx2 = vec![0_u32, 2, 1]; + assert_ne!(compute_mesh_hash(&pos, &idx1), compute_mesh_hash(&pos, &idx2)); + } + + #[test] + fn hash_distinguishes_count_from_value() { + // Two empty inputs must hash distinctly from a single-zero + // input — guards against the count-fold being load-bearing. + let h_empty = compute_mesh_hash(&[], &[]); + let h_one = compute_mesh_hash(&[[0.0_f32; 3]], &[0_u32]); + assert_ne!(h_empty, h_one); + } + + #[test] + fn store_then_load_roundtrips() { + // Skip when the env doesn't expose a cache dir (CI sandbox can do this). + let Some(_) = cache_dir() else { return; }; + // Use a hash unlikely to collide with anything else's tests. + let h = MeshHash(0xfeed_cafe_dead_beef); + let bytes: Vec = (0..VOXEL_BYTES).map(|i| (i * 7 + 13) as u8).collect(); + store(h, &bytes).expect("store"); + let got = load(h).expect("load hit"); + assert_eq!(got, bytes); + // Cleanup so the test is repeatable. + if let Some(p) = cache_path(h) { let _ = fs::remove_file(p); } + } + + #[test] + fn load_miss_returns_none() { + let Some(_) = cache_dir() else { return; }; + let h = MeshHash(0x0000_0000_dead_dead); + if let Some(p) = cache_path(h) { let _ = fs::remove_file(p); } + assert!(load(h).is_none()); + } + + #[test] + fn store_rejects_wrong_size() { + let h = MeshHash(0); + assert!(store(h, &[0u8; 100]).is_err()); + } + + #[test] + fn load_rejects_wrong_magic() { + let Some(dir) = cache_dir() else { return; }; + let _ = fs::create_dir_all(&dir); + let h = MeshHash(0xbad_0_bad_1); + let p = dir.join(h.to_filename()); + // Hand-write a file with the wrong magic. + let mut bad = vec![0u8; 16 + VOXEL_BYTES]; + bad[..6].copy_from_slice(b"NOTBLM"); + std::fs::write(&p, &bad).unwrap(); + assert!(load(h).is_none()); + let _ = fs::remove_file(p); + } +} From 7edab877ae83d83f3dd3591dbb59cb457d65af65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 16 May 2026 11:10:41 +0200 Subject: [PATCH 2/6] chore(pkg): prep @bloomengine/engine for npm publish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename from bloom to @bloomengine/engine (Perry FFI module ref updated to match) and add a files: allowlist so the tarball ships just TS sources, Rust crates, shaders/assets, the bloom_jolt shim, and JoltPhysics/Jolt — vendored at publish time rather than fetched at install so installs stay self-contained and reproducible. .npmignore is belt-and-suspenders against target/, pkg/, build/, and the multi-MB Jolt extras (Samples/Docs/UnitTests/Assets/…). scripts/prepack.sh refuses to publish if the Jolt submodule isn't initialised — deliberately not auto-initing so we don't silently publish stale refs. Root MIT LICENSE added so the legal terms travel with the package. --- .npmignore | 54 ++++++++++++++++++++++++++++++++ LICENSE | 21 +++++++++++++ package.json | 77 ++++++++++++++++++++++++++++++++++++++++++++-- scripts/prepack.sh | 31 +++++++++++++++++++ 4 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 .npmignore create mode 100644 LICENSE create mode 100755 scripts/prepack.sh diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..b3278cf --- /dev/null +++ b/.npmignore @@ -0,0 +1,54 @@ +# The `files:` field in package.json is the source of truth for what +# ships. This .npmignore is belt-and-suspenders — it strips artifacts +# from globbed directories that `files:` pulls in, so we never ship +# build output, IDE state, or local caches even by accident. + +# Rust build output +target/ +**/target/ +*.rlib +*.rmeta + +# wasm-pack output (built per consumer; not part of the package) +native/web/pkg/ + +# Native build dirs +native/third_party/bloom_jolt/build/ +native/third_party/JoltPhysics/Build/ + +# Jolt submodule extras we don't need at consumer build time. +# We only ship JoltPhysics/Jolt/ (the actual sources our cmake builds +# against) plus LICENSE + README. Everything else is samples, viewer, +# docs, assets, tests — multi-MB and irrelevant for embedding. +native/third_party/JoltPhysics/.git +native/third_party/JoltPhysics/.github/ +native/third_party/JoltPhysics/Assets/ +native/third_party/JoltPhysics/Build/ +native/third_party/JoltPhysics/Docs/ +native/third_party/JoltPhysics/HelloWorld/ +native/third_party/JoltPhysics/JoltViewer/ +native/third_party/JoltPhysics/PerformanceTest/ +native/third_party/JoltPhysics/Samples/ +native/third_party/JoltPhysics/TestFramework/ +native/third_party/JoltPhysics/UnitTests/ +native/third_party/JoltPhysics/Doxyfile +native/third_party/JoltPhysics/run_doxygen.bat +native/third_party/JoltPhysics/sonar-project.properties +native/third_party/JoltPhysics/ContributorAgreement.md + +# Perry build artifacts +*.ts.o +*_ts.o +.perry-cache/ +dist/ + +# OS / editor junk +.DS_Store +.vscode/ +.idea/ +*.swp + +# Local-only state never meant for the registry +.claude/ +node_modules/ +package-lock.json diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..580204d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Bloom Engine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/package.json b/package.json index b40c9c3..886c7ef 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "bloom", + "name": "@bloomengine/engine", "version": "0.3.1", "description": "Bloom Engine: native TypeScript game engine compiled by Perry", "main": "src/index.ts", @@ -18,10 +18,83 @@ "./physics": "./src/physics/index.ts", "./world": "./src/world/index.ts" }, + "files": [ + "src/", + "perry.config.ts", + "native/shared/Cargo.toml", + "native/shared/Cargo.lock", + "native/shared/build.rs", + "native/shared/src/**", + "native/shared/shaders/**", + "native/shared/assets/**", + "native/macos/Cargo.toml", + "native/macos/Cargo.lock", + "native/macos/src/**", + "native/ios/Cargo.toml", + "native/ios/Cargo.lock", + "native/ios/src/**", + "native/tvos/Cargo.toml", + "native/tvos/Cargo.lock", + "native/tvos/src/**", + "native/watchos/Cargo.toml", + "native/watchos/Cargo.lock", + "native/watchos/src/**", + "native/watchos/shaders/**", + "native/windows/Cargo.toml", + "native/windows/Cargo.lock", + "native/windows/src/**", + "native/linux/Cargo.toml", + "native/linux/Cargo.lock", + "native/linux/src/**", + "native/android/Cargo.toml", + "native/android/Cargo.lock", + "native/android/src/**", + "native/web/Cargo.toml", + "native/web/Cargo.lock", + "native/web/src/**", + "native/web/build.sh", + "native/web/index.html", + "native/web/bloom_glue.js", + "native/web/jolt_bridge.js", + "native/third_party/bloom_jolt/CMakeLists.txt", + "native/third_party/bloom_jolt/include/**", + "native/third_party/bloom_jolt/src/**", + "native/third_party/JoltPhysics/Jolt/**", + "native/third_party/JoltPhysics/LICENSE", + "native/third_party/JoltPhysics/README.md" + ], + "scripts": { + "prepack": "scripts/prepack.sh" + }, + "keywords": [ + "bloom", + "game-engine", + "perry", + "typescript", + "native", + "wgpu", + "metal", + "directx", + "vulkan", + "webgpu", + "jolt", + "physics" + ], "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/bloomengine/engine.git" + }, + "bugs": { + "url": "https://github.com/bloomengine/engine/issues" + }, + "homepage": "https://github.com/bloomengine/engine#readme", + "publishConfig": { + "access": "public" + }, "perry": { "nativeLibrary": { - "module": "bloom", + "module": "@bloomengine/engine", "functions": [ { "name": "bloom_init_window", "params": ["f64", "f64", "i64", "f64"], "returns": "void" }, { "name": "bloom_close_window", "params": [], "returns": "void" }, diff --git a/scripts/prepack.sh b/scripts/prepack.sh new file mode 100755 index 0000000..605ee5c --- /dev/null +++ b/scripts/prepack.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Runs immediately before `npm pack` / `npm publish` assembles the +# tarball. Its job is to verify the working tree contains everything +# the published package needs — most importantly, the JoltPhysics +# submodule materialised on disk, since the tarball vendors its +# sources rather than relying on a postinstall `git clone`. +# +# We deliberately do NOT auto-init the submodule here: doing so would +# silently publish whatever ref the submodule happens to point at, +# even if it's stale or uncommitted. A loud failure forces a +# deliberate `git submodule update --init` before publishing. + +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +JOLT_DIR="$ROOT/native/third_party/JoltPhysics" +JOLT_SRC="$JOLT_DIR/Jolt" + +if [ ! -d "$JOLT_SRC" ] || [ -z "$(ls -A "$JOLT_SRC" 2>/dev/null)" ]; then + echo "prepack: JoltPhysics submodule is not initialised." >&2 + echo " Expected sources at: $JOLT_SRC" >&2 + echo " Run: git submodule update --init --recursive" >&2 + exit 1 +fi + +if [ ! -f "$JOLT_DIR/LICENSE" ]; then + echo "prepack: JoltPhysics/LICENSE missing — refusing to publish without upstream license." >&2 + exit 1 +fi + +echo "prepack: JoltPhysics sources present ($(du -sh "$JOLT_SRC" | cut -f1)). OK." From 2dd358c194135c6ebaa2e3f1910d7465b0bae6cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 16 May 2026 11:11:10 +0200 Subject: [PATCH 3/6] chore(pkg): fix GitHub URLs to match actual Bloom-Engine/engine remote --- package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 886c7ef..a03055b 100644 --- a/package.json +++ b/package.json @@ -83,12 +83,12 @@ "license": "MIT", "repository": { "type": "git", - "url": "git+https://github.com/bloomengine/engine.git" + "url": "git+https://github.com/Bloom-Engine/engine.git" }, "bugs": { - "url": "https://github.com/bloomengine/engine/issues" + "url": "https://github.com/Bloom-Engine/engine/issues" }, - "homepage": "https://github.com/bloomengine/engine#readme", + "homepage": "https://github.com/Bloom-Engine/engine#readme", "publishConfig": { "access": "public" }, From 04a3678acc4ed76267372f9d9a41a48de459e433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 16 May 2026 11:15:07 +0200 Subject: [PATCH 4/6] docs(pkg): npm install instructions and @bloomengine/engine imports Now that the package is published to npm, swap every documented import from "bloom" to "@bloomengine/engine" so the snippets actually resolve against an installed package. README also gains an Install section up front pointing at npm (plus bun/pnpm/yarn equivalents) and the toolchain prereqs (Perry + Rust, wasm-pack for web). --- README.md | 44 ++++++++++++++++++++++++++++---------- docs/skeletal-animation.md | 4 ++-- docs/web-target.md | 4 ++-- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 4f2670f..af125d3 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,33 @@ Write TypeScript. Ship native games — and now the web too. Bloom compiles your game to Metal, DirectX 12, Vulkan, OpenGL, and WebGPU — one codebase for every platform. +## Install + +```bash +npm install @bloomengine/engine +``` + +Or with your preferred package manager: + +```bash +bun add @bloomengine/engine +pnpm add @bloomengine/engine +yarn add @bloomengine/engine +``` + +The npm package ships the TypeScript API alongside the engine's Rust sources and the bundled [JoltPhysics](https://github.com/jrouwe/JoltPhysics) C++ shim, so a single `install` is enough — there's no separate native download step. + +You'll also need: + +- **Perry** — the TypeScript AOT compiler that turns your game into a native binary or WASM module. It also drives the engine's native build. +- **Rust toolchain** ([rustup.rs](https://rustup.rs)) — Perry invokes Cargo to compile the engine's platform crate the first time you build for each target. +- For web builds only: [wasm-pack](https://rustwasm.github.io/wasm-pack/installer/) (`cargo install wasm-pack`). + ## Quick Start ```typescript import { initWindow, windowShouldClose, beginDrawing, - endDrawing, clearBackground, drawText, Colors } from "bloom"; + endDrawing, clearBackground, drawText, Colors } from "@bloomengine/engine"; initWindow(800, 450, "My Game"); @@ -26,7 +48,7 @@ while (!windowShouldClose()) { Use `runGame()` for code that works on both native and web: ```typescript -import { initWindow, runGame, clearBackground, drawText, Colors } from "bloom"; +import { initWindow, runGame, clearBackground, drawText, Colors } from "@bloomengine/engine"; initWindow(800, 450, "My Game"); @@ -55,14 +77,14 @@ cd dist/web && python3 -m http.server 8080 | Module | Import | Description | |--------|--------|-------------| -| **Core** | `bloom/core` | Window, game loop, input, timing | -| **Shapes** | `bloom/shapes` | 2D drawing + collision detection | -| **Textures** | `bloom/textures` | Image loading, sprite batching | -| **Text** | `bloom/text` | TTF/OTF font loading and rendering | -| **Audio** | `bloom/audio` | Sound effects + music streaming | -| **Models** | `bloom/models` | 3D model loading (glTF, OBJ), skeletal animation | -| **Math** | `bloom/math` | Vectors, matrices, quaternions, easing | -| **Physics** | `bloom/physics` | Jolt-backed rigid + soft bodies, character, vehicles ([docs](docs/physics.md)) | +| **Core** | `@bloomengine/engine/core` | Window, game loop, input, timing | +| **Shapes** | `@bloomengine/engine/shapes` | 2D drawing + collision detection | +| **Textures** | `@bloomengine/engine/textures` | Image loading, sprite batching | +| **Text** | `@bloomengine/engine/text` | TTF/OTF font loading and rendering | +| **Audio** | `@bloomengine/engine/audio` | Sound effects + music streaming | +| **Models** | `@bloomengine/engine/models` | 3D model loading (glTF, OBJ), skeletal animation | +| **Math** | `@bloomengine/engine/math` | Vectors, matrices, quaternions, easing | +| **Physics** | `@bloomengine/engine/physics` | Jolt-backed rigid + soft bodies, character, vehicles ([docs](docs/physics.md)) | ## Platforms @@ -143,7 +165,7 @@ Bloom supports GPU-accelerated skeletal animation via glTF/GLB models. The pipel ```typescript import { loadModel, loadModelAnimation, updateModelAnimation, drawModel, - getTime, Colors } from "bloom"; + getTime, Colors } from "@bloomengine/engine"; const character = loadModel("assets/models/character.glb"); const anim = loadModelAnimation("assets/models/character.glb"); diff --git a/docs/skeletal-animation.md b/docs/skeletal-animation.md index f9c15f4..57d4d7d 100644 --- a/docs/skeletal-animation.md +++ b/docs/skeletal-animation.md @@ -136,7 +136,7 @@ Joint matrices are written to the GPU in `end_frame()` via `flush_joint_matrices ### Loading ```typescript -import { loadModel, loadModelAnimation, drawModel, updateModelAnimation } from "bloom"; +import { loadModel, loadModelAnimation, drawModel, updateModelAnimation } from "@bloomengine/engine"; // Load the mesh (vertices with skin data: JOINTS_0 + WEIGHTS_0) const model = loadModel("assets/models/character.glb"); @@ -179,7 +179,7 @@ drawModel(model, { x: playerX, y: playerY, z: playerZ }, 1.0, WHITE); ```typescript import { initWindow, windowShouldClose, beginDrawing, endDrawing, clearBackground, loadModel, loadModelAnimation, - updateModelAnimation, drawModel, getTime, Colors } from "bloom"; + updateModelAnimation, drawModel, getTime, Colors } from "@bloomengine/engine"; initWindow(800, 600, "Animation Demo"); diff --git a/docs/web-target.md b/docs/web-target.md index 76719a4..b92c738 100644 --- a/docs/web-target.md +++ b/docs/web-target.md @@ -54,7 +54,7 @@ python3 -m http.server 8080 Browsers cannot run blocking `while` loops. Use `runGame()` instead: ```typescript -import { initWindow, runGame, clearBackground, drawRect, Colors } from "bloom"; +import { initWindow, runGame, clearBackground, drawRect, Colors } from "@bloomengine/engine"; initWindow(800, 600, "My Game"); @@ -113,7 +113,7 @@ if (fileExists("save.json")) { ## Platform Detection ```typescript -import { getPlatform, Platform } from "bloom"; +import { getPlatform, Platform } from "@bloomengine/engine"; if (getPlatform() === Platform.WEB) { // web-specific code From e3aef89432c7831a3d8622e8fe43ef42b0bddb46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 16 May 2026 11:15:07 +0200 Subject: [PATCH 5/6] ci(release): publish @bloomengine/engine to npm on tag release Adds a publish-npm job to the existing tag-driven release workflow. Runs after github-release so a failed publish doesn't leave a release-but-no-package state, and after the await-tests gate so we never ship a tag that didn't pass CI. Idempotent: re-checks npm before publishing and skips cleanly if the version already exists (so workflow_dispatch on an old tag won't double-publish). Checks out submodules recursively because the prepack hook refuses to ship without JoltPhysics sources on disk, and uses --provenance for the npm attestation badge. Requires an NPM_TOKEN repo secret with publish rights on the @bloomengine scope. --- .github/workflows/release.yml | 76 ++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 68b32a6..436b87b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,11 +2,15 @@ name: Release # Fires on a version tag push (e.g. `v0.3.2`). Gates on the Tests workflow # passing for the exact same commit, then creates/updates the GitHub Release -# and (once wired up) would publish to npm. +# and publishes the package to npm as @bloomengine/engine. # # The /release Claude Code skill in .claude/skills/release/ drives this end # to end: it bumps the version in package.json, commits, tags, pushes, and # waits for this workflow to go green. +# +# Required secrets: +# NPM_TOKEN — automation token for the @bloomengine npm scope with +# "Publish" permission on @bloomengine/engine. on: push: @@ -147,3 +151,73 @@ jobs: else echo "OK package.json version matches tag ($VERSION)" fi + + # --------------------------------------------------------------------------- + # Publish the package to npm as @bloomengine/engine. Runs after the GitHub + # Release so a failure here doesn't leave a dangling release-but-no-package + # state. Skips cleanly if the version is already on the registry, which + # keeps re-runs idempotent (workflow_dispatch on an existing tag won't + # double-publish or fail). + # + # We check out submodules recursively because scripts/prepack.sh refuses + # to ship a tarball without the JoltPhysics sources materialised — the + # package vendors them rather than relying on a postinstall git clone. + # --------------------------------------------------------------------------- + publish-npm: + needs: github-release + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # required for npm provenance attestations + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - uses: actions/setup-node@v5 + with: + node-version: "24" + registry-url: "https://registry.npmjs.org" + + - name: Resolve tag + id: tag + env: + DISPATCH_TAG: ${{ github.event.inputs.tag }} + run: | + if [ -n "$DISPATCH_TAG" ]; then + TAG="$DISPATCH_TAG" + else + TAG="${GITHUB_REF#refs/tags/}" + fi + echo "tag=$TAG" >> "$GITHUB_OUTPUT" + echo "version=${TAG#v}" >> "$GITHUB_OUTPUT" + + - name: Verify package.json version matches tag + env: + VERSION: ${{ steps.tag.outputs.version }} + TAG: ${{ steps.tag.outputs.tag }} + run: | + PKG_VERSION=$(node -p "require('./package.json').version") + if [ "$PKG_VERSION" != "$VERSION" ]; then + echo "::error::Tag $TAG ($VERSION) does not match package.json ($PKG_VERSION) — refusing to publish." + exit 1 + fi + + - name: Check if version already published + id: check + run: | + PKG_NAME=$(node -p "require('./package.json').name") + PKG_VERSION=$(node -p "require('./package.json').version") + if npm view "$PKG_NAME@$PKG_VERSION" version >/dev/null 2>&1; then + echo "$PKG_NAME@$PKG_VERSION is already on the registry — skipping publish." + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "$PKG_NAME@$PKG_VERSION not yet published — will publish." + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Publish to npm + if: steps.check.outputs.skip == 'false' + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm publish --provenance --access public From ed90787d101b3d3a3d2536bde600a51cdd2e7a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sat, 16 May 2026 11:17:15 +0200 Subject: [PATCH 6/6] ci(release): drop NPM_TOKEN, use npm trusted publishing via OIDC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The package is now configured on npmjs.com with this workflow as a trusted publisher, so `id-token: write` is sufficient — npm publish exchanges the GitHub OIDC token for a short-lived credential. No long-lived NPM_TOKEN secret to rotate or leak, and provenance attestation is automatic. --- .github/workflows/release.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 436b87b..97a69da 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,9 +8,12 @@ name: Release # to end: it bumps the version in package.json, commits, tags, pushes, and # waits for this workflow to go green. # -# Required secrets: -# NPM_TOKEN — automation token for the @bloomengine npm scope with -# "Publish" permission on @bloomengine/engine. +# Authentication: npm trusted publishing. The @bloomengine/engine package is +# configured on npmjs.com with this workflow (Bloom-Engine/engine → +# .github/workflows/release.yml → job publish-npm) as a trusted publisher. +# `id-token: write` on the publish job is enough — `npm publish` exchanges +# the GitHub OIDC token for a short-lived publish credential, no NPM_TOKEN +# secret needed. Provenance attestation is automatic under this flow. on: push: @@ -218,6 +221,4 @@ jobs: - name: Publish to npm if: steps.check.outputs.skip == 'false' - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} run: npm publish --provenance --access public