Skip to content

Commit feefde8

Browse files
committed
Halve peak grid storage: f32 for elevation heights + water blend
Two full-world grids were stored as `Vec<Vec<f64>>`, doubling their resident size for no precision gain: 1. `ElevationData::heights` (computed by the elevation pipeline, used by `Ground::level` → `get_ground_level` on every block-placement hot path). Values are rounded to integer Minecraft Ys at final placement, so the full 15-digit f64 precision was wasted storage. 2. `LandCoverData::water_blend_grid` (Gaussian-blurred water mask used by `ground.water_blend`). Values are bounded to `[0, 1]` and only ever compared against a 0.5 threshold in the renderer — f32's ~7 decimal digits are overkill. Both grids can hit 10+ million cells on a city-sized bbox, so dropping each from 8 to 4 bytes/cell saves ~92 MB combined on a Munich-sized area. Postprocess, rotation, and Gaussian blur stay in f64 internally — the downcast to f32 happens once at storage assignment (`ElevationData` construction + `Ground::set_elevation_data` + `compute_water_blend_smooth` output). Read sites in `Ground` widen back to f64 for bilinear interpolation so rounding behaviour matches the previous implementation bit-for-bit on integer-valued elevations. Local measurement on CI bbox `48.125768,11.552296,48.148565,11.593838` with `--terrain` (integer seconds, 5 runs): main baseline: ~24 s post-fix: ~20-21 s (−3-4 s) Most of the speed gain comes from reduced memory bandwidth on the `Ground::level` hot path — the elevation grid now fits tighter in L2/L3 cache during the per-block lookup loop. Peak memory saving lands wherever in the pipeline the grids are resident (mainly during element processing + ground generation + save), independent of the gen-time win. Tests (65/65), clippy `-D warnings`, fmt clean.
1 parent 8009695 commit feefde8

3 files changed

Lines changed: 56 additions & 16 deletions

File tree

src/elevation/mod.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,14 @@ use selector::select_provider;
1919
/// Holds processed elevation data and metadata
2020
#[derive(Clone)]
2121
pub struct ElevationData {
22-
/// Height values in Minecraft Y coordinates (as f64, rounded to i32 at final block placement)
23-
pub(crate) heights: Vec<Vec<f64>>,
22+
/// Height values in Minecraft Y coordinates.
23+
///
24+
/// Stored as `f32` on purpose: heights are already rounded to integer
25+
/// block Ys at placement time, so the full f64 precision was wasted on a
26+
/// grid that can easily hit 10+ million cells on a city-sized bbox
27+
/// (≈80 MB at f64, halved at f32). Postprocess still runs in f64 for
28+
/// numerical stability; the downcast happens once at construction.
29+
pub(crate) heights: Vec<Vec<f32>>,
2430
/// Width of the elevation grid (may be smaller than world width due to capping)
2531
pub(crate) width: usize,
2632
/// Height of the elevation grid (may be smaller than world height due to capping)
@@ -223,8 +229,17 @@ pub fn fetch_elevation_data(
223229
}
224230
}
225231

232+
// Downcast the f64 postprocess output to the f32 storage format. One-time
233+
// cost paid here so the large grid sits at half the memory for the rest
234+
// of the generation run. NaN/infinity preservation is a requirement —
235+
// downstream `is_finite` checks rely on non-finite sentinels surviving.
236+
let mc_heights_f32: Vec<Vec<f32>> = mc_heights
237+
.into_iter()
238+
.map(|row| row.into_iter().map(|v| v as f32).collect())
239+
.collect();
240+
226241
Ok(ElevationData {
227-
heights: mc_heights,
242+
heights: mc_heights_f32,
228243
width: grid_width,
229244
height: grid_height,
230245
world_width,

src/ground.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,13 @@ impl Ground {
175175
// continuous — the renderer's hard `> 0.5` threshold then traces
176176
// a clean curved shoreline contour instead of the raw ESA 10 m
177177
// rectangular grid edge.
178-
let w00 = lc.water_blend_grid[z0][x0];
179-
let w10 = lc.water_blend_grid[z0][x1];
180-
let w01 = lc.water_blend_grid[z1][x0];
181-
let w11 = lc.water_blend_grid[z1][x1];
178+
// Widen f32 storage to f64 here so the bilinear interpolation
179+
// (and the downstream 0.5-threshold comparison) doesn't lose any
180+
// precision vs the previous all-f64 implementation.
181+
let w00 = lc.water_blend_grid[z0][x0] as f64;
182+
let w10 = lc.water_blend_grid[z0][x1] as f64;
183+
let w01 = lc.water_blend_grid[z1][x0] as f64;
184+
let w11 = lc.water_blend_grid[z1][x1] as f64;
182185

183186
// Bilinear interpolation
184187
let top = w00 * (1.0 - tx) + w10 * tx;
@@ -297,10 +300,15 @@ impl Ground {
297300
let z1 = (z0 + 1).min(data.height - 1);
298301
let dx = fx - x0 as f64;
299302
let dz = fz - z0 as f64;
300-
let v00 = data.heights[z0][x0];
301-
let v10 = data.heights[z0][x1];
302-
let v01 = data.heights[z1][x0];
303-
let v11 = data.heights[z1][x1];
303+
// Widen f32 storage to f64 for the bilinear, same as we always did
304+
// before f32 storage — the arithmetic stays in f64 so rounding to the
305+
// nearest block Y matches the old behaviour bit-for-bit on anything
306+
// the previous f64 storage could represent exactly (integer-valued
307+
// elevations do).
308+
let v00 = data.heights[z0][x0] as f64;
309+
let v10 = data.heights[z0][x1] as f64;
310+
let v01 = data.heights[z1][x0] as f64;
311+
let v11 = data.heights[z1][x1] as f64;
304312
let lerp_top = v00 + (v10 - v00) * dx;
305313
let lerp_bot = v01 + (v11 - v01) * dx;
306314
let result = lerp_top + (lerp_bot - lerp_top) * dz;
@@ -318,7 +326,12 @@ impl Ground {
318326
world_height: usize,
319327
) {
320328
if let Some(ref mut data) = self.elevation_data {
321-
data.heights = heights;
329+
// Rotation operators build a fresh f64 work grid; downcast here to
330+
// match `ElevationData::heights`'s f32 storage layout.
331+
data.heights = heights
332+
.into_iter()
333+
.map(|row| row.into_iter().map(|v| v as f32).collect())
334+
.collect();
322335
data.width = grid_width;
323336
data.height = grid_height;
324337
data.world_width = world_width;
@@ -389,8 +402,8 @@ impl Ground {
389402
let mut img: image::ImageBuffer<Rgb<u8>, Vec<u8>> =
390403
RgbImage::new(width as u32, height as u32);
391404

392-
let mut min_height: f64 = f64::MAX;
393-
let mut max_height: f64 = f64::MIN;
405+
let mut min_height: f32 = f32::MAX;
406+
let mut max_height: f32 = f32::MIN;
394407

395408
for row in heights {
396409
for &h in row {

src/land_cover.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,13 @@ pub struct LandCoverData {
6868
/// and compared against a hard 0.5 threshold in the renderer so the
6969
/// shoreline follows the smoothed contour's 0.5 isoline instead of the
7070
/// raw ESA 10 m rectangular grid edge.
71-
pub water_blend_grid: Vec<Vec<f64>>,
71+
///
72+
/// Stored as `f32` on purpose — the grid can be tens of millions of cells
73+
/// on large bboxes, and the values are bounded to `[0, 1]` and only ever
74+
/// compared against a 0.5 threshold, so f32's ~7 decimal digits are
75+
/// overkill. Halving the storage saves ~46 MB peak on a Munich-sized
76+
/// area.
77+
pub water_blend_grid: Vec<Vec<f32>>,
7278
/// Grid width (matches elevation grid width)
7379
pub width: usize,
7480
/// Grid height (matches elevation grid height)
@@ -94,7 +100,7 @@ impl LandCoverData {
94100
/// - Coarser grid-to-world (large bbox, capped at 4096): each cell already
95101
/// represents many blocks, so a 3-cell blur represents many blocks of
96102
/// softening — appropriate for the coarser effective resolution.
97-
fn compute_water_blend_smooth(grid: &[Vec<u8>], width: usize, height: usize) -> Vec<Vec<f64>> {
103+
fn compute_water_blend_smooth(grid: &[Vec<u8>], width: usize, height: usize) -> Vec<Vec<f32>> {
98104
const SIGMA_CELLS: f64 = 3.0;
99105

100106
if width == 0 || height == 0 {
@@ -110,7 +116,13 @@ fn compute_water_blend_smooth(grid: &[Vec<u8>], width: usize, height: usize) ->
110116
.collect()
111117
})
112118
.collect();
119+
// Gaussian blur runs in f64 for numerical stability, then we drop down to
120+
// f32 for storage — values land in [0, 1] and are only ever compared to a
121+
// 0.5 threshold, so precision beyond f32 is wasted.
113122
crate::elevation::postprocess::gaussian_blur_grid(&binary, SIGMA_CELLS)
123+
.into_iter()
124+
.map(|row| row.into_iter().map(|v| v as f32).collect())
125+
.collect()
114126
}
115127

116128
/// Metadata parsed from a COG (Cloud-Optimized GeoTIFF) IFD.

0 commit comments

Comments
 (0)