Skip to main content

spatialrust_filtering/
voxel.rs

1use std::collections::HashMap;
2use std::hash::{BuildHasherDefault, Hasher};
3
4use spatialrust_core::{
5    DType, DeviceKind, ExecutionPolicy, FieldSemantic, HasPositions3, PointBuffer, PointBufferSet,
6    PointCloud, PointField, PointSchema, SpatialError, SpatialResult,
7};
8use spatialrust_math::Vec3;
9
10use crate::filter::PointCloudFilter;
11
12/// Voxel keys are small integer tuples, so a fast multiply-rotate hasher (à la
13/// FxHash) beats the default SipHash by a wide margin on the cell map.
14#[derive(Default)]
15struct VoxelKeyHasher {
16    hash: u64,
17}
18
19impl VoxelKeyHasher {
20    #[inline]
21    fn mix(&mut self, value: u64) {
22        const K: u64 = 0x517c_c1b7_2722_0a95;
23        self.hash = (self.hash.rotate_left(5) ^ value).wrapping_mul(K);
24    }
25}
26
27impl Hasher for VoxelKeyHasher {
28    #[inline]
29    fn finish(&self) -> u64 {
30        self.hash
31    }
32
33    #[inline]
34    fn write_i64(&mut self, i: i64) {
35        self.mix(i as u64);
36    }
37
38    #[inline]
39    fn write_u64(&mut self, i: u64) {
40        self.mix(i);
41    }
42
43    #[inline]
44    fn write_u32(&mut self, i: u32) {
45        self.mix(u64::from(i));
46    }
47
48    #[inline]
49    fn write_i32(&mut self, i: i32) {
50        self.mix(i as u64);
51    }
52
53    fn write(&mut self, bytes: &[u8]) {
54        for &b in bytes {
55            self.mix(u64::from(b));
56        }
57    }
58}
59
60/// Cell map keyed by integer voxel coordinates, using the fast voxel hasher.
61type VoxelCellMap = HashMap<(i64, i64, i64), VoxelCell, BuildHasherDefault<VoxelKeyHasher>>;
62type XyzVoxelCellMap = HashMap<(i64, i64, i64), usize, BuildHasherDefault<VoxelKeyHasher>>;
63type XyzVoxelCellMapU32 = HashMap<(u32, u32, u32), usize, BuildHasherDefault<VoxelKeyHasher>>;
64
65/// Voxel aggregation strategy.
66#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
67pub enum VoxelAggregationMode {
68    /// Average all points in each voxel (centroid).
69    #[default]
70    Centroid,
71    /// Keep the first point that falls into each voxel.
72    ApproximateFirst,
73}
74
75/// Attribute aggregation policy for non-position fields.
76#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
77pub enum AttributeAggregation {
78    /// Average numeric attributes within a voxel.
79    #[default]
80    Average,
81    /// Keep the first point's attribute values.
82    First,
83}
84
85/// Default minimum point count before GPU voxel downsampling is selected (centroid).
86///
87/// Local end-to-end filter benches show GPU centroid wins above ~500k points.
88pub const DEFAULT_GPU_MIN_POINTS: usize = 500_000;
89
90/// Default minimum point count before GPU approximate-first downsampling is selected.
91///
92/// Approximate-first pays a higher gather/readback cost than centroid. End-to-end
93/// benches through 1M still favor CPU (~45 ms vs ~50 ms at 1M); auto-GPU is deferred
94/// until a crossover is measured above that range.
95pub const DEFAULT_GPU_MIN_POINTS_APPROXIMATE: usize = 2_000_000;
96
97/// Non-position F32 attribute count at/above which approximate-first Auto uses a higher GPU threshold.
98///
99/// Epic 38: `point_xyzinormal` approximate-first GPU lost at all measured scales.
100/// Epic 46: upload pool + zero-copy attrs restored GPU crossover at 1M+.
101pub const APPROXIMATE_HEAVY_F32_ATTRIBUTE_CHANNELS: usize = 4;
102
103/// Auto GPU threshold for approximate-first on attribute-heavy schemas (e.g. xyzinormal).
104pub const DEFAULT_GPU_MIN_POINTS_APPROXIMATE_HEAVY: usize = 1_000_000;
105
106/// Configuration for voxel-grid downsampling.
107#[derive(Clone, Copy, Debug, PartialEq)]
108pub struct VoxelGridDownsampleConfig {
109    /// Voxel edge length in meters.
110    pub leaf_size: f32,
111    /// Optional grid origin. Defaults to the cloud minimum corner.
112    pub origin: Option<Vec3<f32>>,
113    /// Position aggregation mode.
114    pub mode: VoxelAggregationMode,
115    /// Aggregation policy for other fields.
116    pub attribute_policy: AttributeAggregation,
117    /// Minimum input point count before GPU execution is considered worthwhile.
118    ///
119    /// `None` always uses GPU when requested. Defaults follow local bench results:
120    /// centroid ~500k, approximate-first ~2M (1M end-to-end still CPU-favored).
121    ///
122    /// Approximate-first Auto also consults the input schema: clouds with
123    /// `APPROXIMATE_HEAVY_F32_ATTRIBUTE_CHANNELS` or more non-position F32 fields
124    /// (e.g. `point_xyzinormal`) use `DEFAULT_GPU_MIN_POINTS_APPROXIMATE_HEAVY`.
125    pub gpu_min_points: Option<usize>,
126}
127
128impl VoxelGridDownsampleConfig {
129    /// Creates a centroid downsampling config with uniform leaf size.
130    #[must_use]
131    pub fn centroid(leaf_size: f32) -> Self {
132        Self {
133            leaf_size,
134            origin: None,
135            mode: VoxelAggregationMode::Centroid,
136            attribute_policy: AttributeAggregation::Average,
137            gpu_min_points: Some(DEFAULT_GPU_MIN_POINTS),
138        }
139    }
140
141    /// Creates an approximate first-point downsampling config.
142    #[must_use]
143    pub fn approximate(leaf_size: f32) -> Self {
144        Self {
145            leaf_size,
146            origin: None,
147            mode: VoxelAggregationMode::ApproximateFirst,
148            attribute_policy: AttributeAggregation::First,
149            gpu_min_points: Some(DEFAULT_GPU_MIN_POINTS_APPROXIMATE),
150        }
151    }
152
153    /// Disables the GPU point-count heuristic so GPU is always used when requested.
154    #[must_use]
155    pub const fn without_gpu_min_points(mut self) -> Self {
156        self.gpu_min_points = None;
157        self
158    }
159
160    /// Returns the point-count threshold used by [`ExecutionPolicy::Auto`].
161    ///
162    /// Approximate-first mode raises the effective threshold to
163    /// `DEFAULT_GPU_MIN_POINTS_APPROXIMATE_HEAVY` when the schema carries many F32
164    /// attributes (Epic 38 regression, Epic 46 crossover at 1M+).
165    #[must_use]
166    pub fn effective_gpu_min_points(&self, schema: &PointSchema) -> Option<usize> {
167        let base = self.gpu_min_points?;
168        if self.mode != VoxelAggregationMode::ApproximateFirst {
169            return Some(base);
170        }
171        if count_non_position_f32_fields(schema) >= APPROXIMATE_HEAVY_F32_ATTRIBUTE_CHANNELS {
172            return Some(DEFAULT_GPU_MIN_POINTS_APPROXIMATE_HEAVY);
173        }
174        Some(base)
175    }
176}
177
178/// Voxel-grid downsampling filter.
179#[derive(Clone, Copy, Debug, PartialEq)]
180pub struct VoxelGridDownsample {
181    config: VoxelGridDownsampleConfig,
182}
183
184impl VoxelGridDownsample {
185    /// Creates a filter from config.
186    #[must_use]
187    pub const fn new(config: VoxelGridDownsampleConfig) -> Self {
188        Self { config }
189    }
190
191    /// Returns the filter config.
192    #[must_use]
193    pub const fn config(&self) -> VoxelGridDownsampleConfig {
194        self.config
195    }
196
197    /// Applies the filter using the requested execution policy.
198    ///
199    /// GPU execution assigns voxel keys on wgpu, builds segments with GPU sorting,
200    /// and performs centroid or approximate-first aggregation on wgpu.
201    ///
202    /// [`ExecutionPolicy::Auto`] picks GPU only when the input meets
203    /// [`VoxelGridDownsampleConfig::gpu_min_points`]. Explicit GPU requests below the
204    /// threshold fall back to CPU to avoid upload/readback overhead on small clouds.
205    pub fn filter_with_policy(
206        &self,
207        input: &PointCloud,
208        policy: ExecutionPolicy,
209    ) -> SpatialResult<PointCloud> {
210        self.filter_internal(input, policy)
211    }
212}
213
214impl PointCloudFilter for VoxelGridDownsample {
215    fn name(&self) -> &'static str {
216        "VoxelGridDownsample"
217    }
218
219    fn filter(&self, input: &PointCloud) -> SpatialResult<PointCloud> {
220        self.filter_internal(input, ExecutionPolicy::CpuSingle)
221    }
222}
223
224impl VoxelGridDownsample {
225    fn filter_internal(
226        &self,
227        input: &PointCloud,
228        policy: ExecutionPolicy,
229    ) -> SpatialResult<PointCloud> {
230        if input.is_empty() {
231            return Ok(input.clone());
232        }
233        if self.config.leaf_size <= 0.0 {
234            return Err(SpatialError::InvalidArgument(
235                "leaf_size must be greater than zero".to_owned(),
236            ));
237        }
238
239        let (x, y, z) = input.positions3()?;
240        let inv_leaf = 1.0 / self.config.leaf_size;
241        let (origin, u32_voxel_keys) = match self.config.origin {
242            Some(origin) => (origin, false),
243            None => {
244                let (min, max) = compute_bounds(x, y, z);
245                (min, fits_u32_voxel_key(min, max, inv_leaf))
246            }
247        };
248        let origin_is_min = self.config.origin.is_none();
249        let policy = self.resolve_policy(input, policy);
250
251        if matches!(policy, ExecutionPolicy::Gpu(DeviceKind::Wgpu)) {
252            #[cfg(feature = "filter-voxel-gpu")]
253            {
254                return match self.config.mode {
255                    VoxelAggregationMode::Centroid => filter_gpu_centroid(
256                        input,
257                        x,
258                        y,
259                        z,
260                        origin,
261                        inv_leaf,
262                        self.config.attribute_policy,
263                    ),
264                    VoxelAggregationMode::ApproximateFirst => filter_gpu_approximate_first(
265                        input,
266                        x,
267                        y,
268                        z,
269                        origin,
270                        inv_leaf,
271                        self.config.attribute_policy,
272                    ),
273                };
274            }
275            #[cfg(not(feature = "filter-voxel-gpu"))]
276            {
277                return Err(SpatialError::InvalidArgument(
278                    "GPU voxel downsampling requires the filter-voxel-gpu feature".to_owned(),
279                ));
280            }
281        }
282
283        // Fast path: the common centroid + average case (the default config and
284        // what PCL's VoxelGrid does) is a single pass that resolves field
285        // buffers once and accumulates per-cell sums into flat arrays, avoiding a
286        // per-cell index Vec and a string-keyed field lookup per point.
287        if matches!(
288            policy,
289            ExecutionPolicy::Auto | ExecutionPolicy::CpuSingle | ExecutionPolicy::CpuParallel
290        ) && self.config.mode == VoxelAggregationMode::Centroid
291            && self.config.attribute_policy == AttributeAggregation::Average
292        {
293            return filter_cpu_centroid_fast(
294                input,
295                x,
296                y,
297                z,
298                origin,
299                inv_leaf,
300                origin_is_min,
301                u32_voxel_keys,
302            );
303        }
304
305        let cells = match policy {
306            ExecutionPolicy::Gpu(DeviceKind::Wgpu) => {
307                build_voxel_cells_gpu(x, y, z, origin, inv_leaf)?
308            }
309            ExecutionPolicy::Gpu(_) => {
310                return Err(SpatialError::InvalidArgument(
311                    "unsupported GPU device kind for voxel downsampling".to_owned(),
312                ));
313            }
314            ExecutionPolicy::Auto | ExecutionPolicy::CpuSingle | ExecutionPolicy::CpuParallel => {
315                build_voxel_cells_cpu(x, y, z, origin, inv_leaf)
316            }
317        };
318
319        let schema = input.schema().clone();
320        let mut buffers = PointBufferSet::new();
321        for field in schema.fields() {
322            buffers
323                .insert(field.name.clone(), PointBuffer::with_capacity(field.dtype, cells.len()));
324        }
325
326        let mut ordered_cells: Vec<_> = cells.into_iter().collect();
327        ordered_cells.sort_by_key(|(key, _)| *key);
328
329        for (_, cell) in ordered_cells {
330            append_voxel_point(
331                input,
332                &mut buffers,
333                schema.fields(),
334                &cell,
335                self.config.mode,
336                self.config.attribute_policy,
337            )?;
338        }
339
340        PointCloud::try_from_parts(schema, buffers, input.metadata().clone())
341    }
342
343    fn resolve_policy(&self, input: &PointCloud, policy: ExecutionPolicy) -> ExecutionPolicy {
344        match policy {
345            ExecutionPolicy::Auto => {
346                if self.should_use_gpu(input) {
347                    ExecutionPolicy::Gpu(DeviceKind::Wgpu)
348                } else {
349                    ExecutionPolicy::CpuSingle
350                }
351            }
352            ExecutionPolicy::Gpu(DeviceKind::Wgpu) if !self.should_use_gpu(input) => {
353                ExecutionPolicy::CpuSingle
354            }
355            other => other,
356        }
357    }
358
359    fn should_use_gpu(&self, input: &PointCloud) -> bool {
360        #[cfg(feature = "filter-voxel-gpu")]
361        {
362            match self.config.effective_gpu_min_points(input.schema()) {
363                Some(min_points) => input.len() >= min_points,
364                None => true,
365            }
366        }
367        #[cfg(not(feature = "filter-voxel-gpu"))]
368        {
369            let _ = input;
370            false
371        }
372    }
373}
374
375fn count_non_position_f32_fields(schema: &PointSchema) -> usize {
376    schema
377        .fields()
378        .iter()
379        .filter(|field| {
380            !matches!(
381                field.semantic,
382                FieldSemantic::PositionX | FieldSemantic::PositionY | FieldSemantic::PositionZ
383            ) && matches!(field.dtype, DType::F32 | DType::F16)
384        })
385        .count()
386}
387
388#[derive(Clone, Debug, Default)]
389struct VoxelCell {
390    indices: Vec<usize>,
391}
392
393/// Single-pass centroid voxel downsampling for the default (Centroid + Average)
394/// case. Resolves every field's buffer once, then accumulates per-cell sums into
395/// flat arrays keyed by a sequential cell id, so there is no per-cell allocation
396/// and no per-point field lookup.
397fn filter_cpu_centroid_fast(
398    input: &PointCloud,
399    x: &[f32],
400    y: &[f32],
401    z: &[f32],
402    origin: Vec3<f32>,
403    inv_leaf: f32,
404    origin_is_min: bool,
405    u32_voxel_keys: bool,
406) -> SpatialResult<PointCloud> {
407    if let Some(output) = filter_cpu_xyz_centroid_fast(
408        input,
409        x,
410        y,
411        z,
412        origin,
413        inv_leaf,
414        origin_is_min,
415        u32_voxel_keys,
416    )? {
417        return Ok(output);
418    }
419
420    let schema = input.schema().clone();
421    let fields = schema.fields();
422    let n_fields = fields.len();
423
424    // Resolve each field's backing buffer once.
425    let field_buffers: Vec<&PointBuffer> =
426        fields.iter().map(|f| input.field(&f.name)).collect::<SpatialResult<_>>()?;
427
428    let mut key_to_id: HashMap<(i64, i64, i64), u32, BuildHasherDefault<VoxelKeyHasher>> =
429        HashMap::default();
430    let mut keys: Vec<(i64, i64, i64)> = Vec::new();
431    let mut counts: Vec<u32> = Vec::new();
432    // Flat `cell * n_fields + field` accumulator of f64 sums.
433    let mut sums: Vec<f64> = Vec::new();
434
435    for i in 0..x.len() {
436        let key = if origin_is_min {
437            voxel_key_nonnegative(x[i], y[i], z[i], origin, inv_leaf)
438        } else {
439            voxel_key(x[i], y[i], z[i], origin, inv_leaf)
440        };
441        let id = *key_to_id.entry(key).or_insert_with(|| {
442            let id = counts.len() as u32;
443            counts.push(0);
444            keys.push(key);
445            sums.extend(std::iter::repeat(0.0).take(n_fields));
446            id
447        }) as usize;
448        counts[id] += 1;
449        let base = id * n_fields;
450        for (fi, buffer) in field_buffers.iter().enumerate() {
451            sums[base + fi] += f64::from(read_buffer_f32(buffer, i));
452        }
453    }
454
455    // Deterministic output: emit cells in voxel-key order.
456    let mut order: Vec<u32> = (0..counts.len() as u32).collect();
457    order.sort_by_key(|&id| keys[id as usize]);
458
459    let mut buffers = PointBufferSet::new();
460    for field in fields {
461        buffers.insert(field.name.clone(), PointBuffer::with_capacity(field.dtype, counts.len()));
462    }
463    for &id in &order {
464        let id = id as usize;
465        let inv_count = 1.0 / f64::from(counts[id]);
466        let base = id * n_fields;
467        for (fi, field) in fields.iter().enumerate() {
468            push_field(&mut buffers, field, (sums[base + fi] * inv_count) as f32)?;
469        }
470    }
471
472    PointCloud::try_from_parts(schema, buffers, input.metadata().clone())
473}
474
475#[derive(Clone, Copy, Debug, Default)]
476struct XyzVoxelCell {
477    sum_x: f32,
478    sum_y: f32,
479    sum_z: f32,
480    count: u32,
481}
482
483fn filter_cpu_xyz_centroid_fast(
484    input: &PointCloud,
485    x: &[f32],
486    y: &[f32],
487    z: &[f32],
488    origin: Vec3<f32>,
489    inv_leaf: f32,
490    origin_is_min: bool,
491    u32_voxel_keys: bool,
492) -> SpatialResult<Option<PointCloud>> {
493    let schema = input.schema();
494    if !is_plain_xyz_f32_schema(schema) {
495        return Ok(None);
496    }
497
498    let expected_cells = (x.len() / 2).clamp(16, 1_048_576);
499    let mut cells = Vec::<XyzVoxelCell>::with_capacity(expected_cells);
500    if u32_voxel_keys {
501        let mut key_to_id = XyzVoxelCellMapU32::with_capacity_and_hasher(
502            expected_cells,
503            BuildHasherDefault::<VoxelKeyHasher>::default(),
504        );
505        for i in 0..x.len() {
506            let key = voxel_key_nonnegative_u32(x[i], y[i], z[i], origin, inv_leaf);
507            let id = xyz_cell_id_u32(&mut key_to_id, &mut cells, key);
508            let cell = &mut cells[id];
509            cell.sum_x += x[i];
510            cell.sum_y += y[i];
511            cell.sum_z += z[i];
512            cell.count += 1;
513        }
514    } else if origin_is_min {
515        let mut key_to_id = XyzVoxelCellMap::with_capacity_and_hasher(
516            expected_cells,
517            BuildHasherDefault::<VoxelKeyHasher>::default(),
518        );
519        for i in 0..x.len() {
520            let key = voxel_key_nonnegative(x[i], y[i], z[i], origin, inv_leaf);
521            let id = xyz_cell_id(&mut key_to_id, &mut cells, key);
522            let cell = &mut cells[id];
523            cell.sum_x += x[i];
524            cell.sum_y += y[i];
525            cell.sum_z += z[i];
526            cell.count += 1;
527        }
528    } else {
529        let mut key_to_id = XyzVoxelCellMap::with_capacity_and_hasher(
530            expected_cells,
531            BuildHasherDefault::<VoxelKeyHasher>::default(),
532        );
533        for i in 0..x.len() {
534            let key = voxel_key(x[i], y[i], z[i], origin, inv_leaf);
535            let id = xyz_cell_id(&mut key_to_id, &mut cells, key);
536            let cell = &mut cells[id];
537            cell.sum_x += x[i];
538            cell.sum_y += y[i];
539            cell.sum_z += z[i];
540            cell.count += 1;
541        }
542    }
543
544    let mut out_x = Vec::with_capacity(cells.len());
545    let mut out_y = Vec::with_capacity(cells.len());
546    let mut out_z = Vec::with_capacity(cells.len());
547    for cell in cells {
548        let inv_count = 1.0 / cell.count as f32;
549        out_x.push(cell.sum_x * inv_count);
550        out_y.push(cell.sum_y * inv_count);
551        out_z.push(cell.sum_z * inv_count);
552    }
553
554    let mut out_x = Some(out_x);
555    let mut out_y = Some(out_y);
556    let mut out_z = Some(out_z);
557    let mut buffers = PointBufferSet::new();
558    for field in schema.fields() {
559        let values = match field.semantic {
560            FieldSemantic::PositionX => out_x.take().expect("x emitted once"),
561            FieldSemantic::PositionY => out_y.take().expect("y emitted once"),
562            FieldSemantic::PositionZ => out_z.take().expect("z emitted once"),
563            _ => return Ok(None),
564        };
565        buffers.insert(field.name.clone(), PointBuffer::from_f32(values));
566    }
567
568    PointCloud::try_from_parts(schema.clone(), buffers, input.metadata().clone()).map(Some)
569}
570
571fn xyz_cell_id(
572    key_to_id: &mut XyzVoxelCellMap,
573    cells: &mut Vec<XyzVoxelCell>,
574    key: (i64, i64, i64),
575) -> usize {
576    match key_to_id.entry(key) {
577        std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
578        std::collections::hash_map::Entry::Vacant(entry) => {
579            let id = cells.len();
580            cells.push(XyzVoxelCell::default());
581            entry.insert(id);
582            id
583        }
584    }
585}
586
587fn xyz_cell_id_u32(
588    key_to_id: &mut XyzVoxelCellMapU32,
589    cells: &mut Vec<XyzVoxelCell>,
590    key: (u32, u32, u32),
591) -> usize {
592    match key_to_id.entry(key) {
593        std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
594        std::collections::hash_map::Entry::Vacant(entry) => {
595            let id = cells.len();
596            cells.push(XyzVoxelCell::default());
597            entry.insert(id);
598            id
599        }
600    }
601}
602
603fn is_plain_xyz_f32_schema(schema: &PointSchema) -> bool {
604    if schema.fields().len() != 3 {
605        return false;
606    }
607    let mut seen_x = false;
608    let mut seen_y = false;
609    let mut seen_z = false;
610    for field in schema.fields() {
611        if !matches!(field.dtype, DType::F32 | DType::F16) {
612            return false;
613        }
614        match field.semantic {
615            FieldSemantic::PositionX => seen_x = true,
616            FieldSemantic::PositionY => seen_y = true,
617            FieldSemantic::PositionZ => seen_z = true,
618            _ => return false,
619        }
620    }
621    seen_x && seen_y && seen_z
622}
623
624/// Reads any numeric buffer column as `f32` by index.
625fn read_buffer_f32(buffer: &PointBuffer, index: usize) -> f32 {
626    match buffer {
627        PointBuffer::F32(v) => v[index],
628        PointBuffer::F64(v) => v[index] as f32,
629        PointBuffer::U8(v) => f32::from(v[index]),
630        PointBuffer::U16(v) => f32::from(v[index]),
631        PointBuffer::U32(v) => v[index] as f32,
632        PointBuffer::I32(v) => v[index] as f32,
633    }
634}
635
636fn build_voxel_cells_cpu(
637    x: &[f32],
638    y: &[f32],
639    z: &[f32],
640    origin: Vec3<f32>,
641    inv_leaf: f32,
642) -> VoxelCellMap {
643    let mut cells = VoxelCellMap::default();
644    for index in 0..x.len() {
645        let key = voxel_key(x[index], y[index], z[index], origin, inv_leaf);
646        cells.entry(key).or_default().indices.push(index);
647    }
648    cells
649}
650
651#[cfg(feature = "filter-voxel-gpu")]
652fn build_voxel_cells_gpu(
653    x: &[f32],
654    y: &[f32],
655    z: &[f32],
656    origin: Vec3<f32>,
657    inv_leaf: f32,
658) -> SpatialResult<VoxelCellMap> {
659    use spatialrust_gpu::{compute_voxel_keys, WgpuRuntime};
660
661    let runtime = WgpuRuntime::shared()?;
662    let keys = compute_voxel_keys(&runtime, x, y, z, [origin.x, origin.y, origin.z], inv_leaf)?;
663
664    let mut cells = VoxelCellMap::default();
665    for (index, key) in keys.into_iter().enumerate() {
666        cells.entry(key).or_default().indices.push(index);
667    }
668    Ok(cells)
669}
670
671#[cfg(feature = "filter-voxel-gpu")]
672fn gpu_non_position_fields(schema: &PointSchema) -> Vec<PointField> {
673    schema
674        .fields()
675        .iter()
676        .filter(|field| {
677            !matches!(
678                field.semantic,
679                FieldSemantic::PositionX | FieldSemantic::PositionY | FieldSemantic::PositionZ
680            )
681        })
682        .cloned()
683        .collect()
684}
685
686#[cfg(feature = "filter-voxel-gpu")]
687fn partition_gpu_attribute_fields(fields: &[PointField]) -> (Vec<PointField>, Vec<PointField>) {
688    let mut f32_fields = Vec::new();
689    let mut u8_fields = Vec::new();
690    for field in fields {
691        if field.dtype == DType::U8 {
692            u8_fields.push(field.clone());
693        } else {
694            f32_fields.push(field.clone());
695        }
696    }
697    (f32_fields, u8_fields)
698}
699
700#[cfg(feature = "filter-voxel-gpu")]
701fn collect_attribute_f32_sources(
702    input: &PointCloud,
703    fields: &[PointField],
704) -> SpatialResult<Vec<Vec<f32>>> {
705    let mut sources = Vec::with_capacity(fields.len());
706    for field in fields {
707        let mut values = Vec::with_capacity(input.len());
708        for index in 0..input.len() {
709            values.push(read_field_f32(input, field, index)?);
710        }
711        sources.push(values);
712    }
713    Ok(sources)
714}
715
716#[cfg(feature = "filter-voxel-gpu")]
717fn borrow_attribute_f32_channels<'a>(
718    input: &'a PointCloud,
719    fields: &[PointField],
720) -> SpatialResult<Option<Vec<&'a [f32]>>> {
721    let mut channels = Vec::with_capacity(fields.len());
722    for field in fields {
723        if !matches!(field.dtype, DType::F32 | DType::F16) {
724            return Ok(None);
725        }
726        channels.push(input.field(&field.name)?.as_f32()?);
727    }
728    Ok(Some(channels))
729}
730
731#[cfg(feature = "filter-voxel-gpu")]
732fn collect_attribute_u8_sources(
733    input: &PointCloud,
734    fields: &[PointField],
735) -> SpatialResult<Vec<Vec<u8>>> {
736    let mut sources = Vec::with_capacity(fields.len());
737    for field in fields {
738        let buffer = input.field(&field.name)?;
739        let PointBuffer::U8(values) = buffer else {
740            return Err(SpatialError::UnsupportedDType(field.dtype));
741        };
742        sources.push(values.to_vec());
743    }
744    Ok(sources)
745}
746
747#[cfg(feature = "filter-voxel-gpu")]
748fn assemble_gpu_voxel_output(
749    input: &PointCloud,
750    out_x: Vec<f32>,
751    out_y: Vec<f32>,
752    out_z: Vec<f32>,
753    f32_attribute_fields: &[PointField],
754    f32_attribute_values: Vec<Vec<f32>>,
755    u8_attribute_fields: &[PointField],
756    u8_attribute_values: Vec<Vec<u8>>,
757) -> SpatialResult<PointCloud> {
758    let schema = input.schema().clone();
759    let mut buffers = PointBufferSet::new();
760
761    let x_field = schema
762        .find_semantic(FieldSemantic::PositionX)
763        .ok_or_else(|| SpatialError::MissingField("x".to_owned()))?;
764    let y_field = schema
765        .find_semantic(FieldSemantic::PositionY)
766        .ok_or_else(|| SpatialError::MissingField("y".to_owned()))?;
767    let z_field = schema
768        .find_semantic(FieldSemantic::PositionZ)
769        .ok_or_else(|| SpatialError::MissingField("z".to_owned()))?;
770
771    set_field_from_f32(&mut buffers, x_field, out_x)?;
772    set_field_from_f32(&mut buffers, y_field, out_y)?;
773    set_field_from_f32(&mut buffers, z_field, out_z)?;
774
775    for (field, values) in f32_attribute_fields.iter().zip(f32_attribute_values) {
776        set_field_from_f32(&mut buffers, field, values)?;
777    }
778    for (field, values) in u8_attribute_fields.iter().zip(u8_attribute_values) {
779        set_field_from_u8(&mut buffers, field, values)?;
780    }
781
782    PointCloud::try_from_parts(schema, buffers, input.metadata().clone())
783}
784
785#[cfg(feature = "filter-voxel-gpu")]
786fn filter_gpu_centroid(
787    input: &PointCloud,
788    x: &[f32],
789    y: &[f32],
790    z: &[f32],
791    origin: Vec3<f32>,
792    inv_leaf: f32,
793    attribute_policy: AttributeAggregation,
794) -> SpatialResult<PointCloud> {
795    use spatialrust_gpu::{
796        build_voxel_segments_gpu_from_keys_buffer, compute_voxel_keys_gpu_buffers,
797        downsample_voxel_centroid_gpu, reduce_voxel_centroids_xyz_and_average_multi_gpu,
798        reduce_voxel_centroids_xyz_and_gather_first_multi_gpu, WgpuRuntime,
799    };
800
801    let attribute_fields = gpu_non_position_fields(input.schema());
802    if attribute_fields.is_empty() {
803        let runtime = WgpuRuntime::shared()?;
804        let pipeline = downsample_voxel_centroid_gpu(
805            &runtime,
806            x,
807            y,
808            z,
809            [origin.x, origin.y, origin.z],
810            inv_leaf,
811        )?;
812        return assemble_gpu_voxel_output(
813            input,
814            pipeline.out_x,
815            pipeline.out_y,
816            pipeline.out_z,
817            &[],
818            Vec::new(),
819            &[],
820            Vec::new(),
821        );
822    }
823
824    let (f32_fields, u8_fields) = partition_gpu_attribute_fields(&attribute_fields);
825    let runtime = WgpuRuntime::shared()?;
826    let positions = compute_voxel_keys_gpu_buffers(
827        &runtime,
828        x,
829        y,
830        z,
831        [origin.x, origin.y, origin.z],
832        inv_leaf,
833    )?;
834    let point_count = positions.point_count();
835    let segments = build_voxel_segments_gpu_from_keys_buffer(
836        &runtime,
837        positions.keys_buffer(),
838        point_count,
839        point_count.next_power_of_two(),
840    )?;
841    let owned_f32_sources;
842    let f32_refs: Vec<&[f32]> =
843        if let Some(borrowed) = borrow_attribute_f32_channels(input, &f32_fields)? {
844            borrowed
845        } else {
846            owned_f32_sources = collect_attribute_f32_sources(input, &f32_fields)?;
847            owned_f32_sources.iter().map(Vec::as_slice).collect()
848        };
849    let u8_sources = collect_attribute_u8_sources(input, &u8_fields)?;
850    let u8_refs: Vec<&[u8]> = u8_sources.iter().map(Vec::as_slice).collect();
851    let (out_x, out_y, out_z, f32_values, u8_values) = match attribute_policy {
852        AttributeAggregation::Average => reduce_voxel_centroids_xyz_and_average_multi_gpu(
853            &runtime,
854            positions.x_buffer(),
855            positions.y_buffer(),
856            positions.z_buffer(),
857            &f32_refs,
858            &u8_refs,
859            &segments,
860        )?,
861        AttributeAggregation::First => reduce_voxel_centroids_xyz_and_gather_first_multi_gpu(
862            &runtime,
863            positions.x_buffer(),
864            positions.y_buffer(),
865            positions.z_buffer(),
866            &f32_refs,
867            &u8_refs,
868            &segments,
869        )?,
870    };
871
872    positions.recycle(&runtime);
873
874    assemble_gpu_voxel_output(
875        input,
876        out_x,
877        out_y,
878        out_z,
879        &f32_fields,
880        f32_values,
881        &u8_fields,
882        u8_values,
883    )
884}
885
886#[cfg(feature = "filter-voxel-gpu")]
887fn filter_gpu_approximate_first(
888    input: &PointCloud,
889    x: &[f32],
890    y: &[f32],
891    z: &[f32],
892    origin: Vec3<f32>,
893    inv_leaf: f32,
894    attribute_policy: AttributeAggregation,
895) -> SpatialResult<PointCloud> {
896    use spatialrust_gpu::{
897        build_voxel_segments_gpu_from_keys_buffer, compute_voxel_keys_gpu_buffers,
898        downsample_voxel_approximate_first_gpu, gather_voxel_first_xyz_and_average_multi_gpu,
899        gather_voxel_first_xyz_and_multi_gpu, WgpuRuntime,
900    };
901
902    let attribute_fields = gpu_non_position_fields(input.schema());
903    if attribute_fields.is_empty() {
904        let runtime = WgpuRuntime::shared()?;
905        let pipeline = downsample_voxel_approximate_first_gpu(
906            &runtime,
907            x,
908            y,
909            z,
910            [origin.x, origin.y, origin.z],
911            inv_leaf,
912        )?;
913        return assemble_gpu_voxel_output(
914            input,
915            pipeline.out_x,
916            pipeline.out_y,
917            pipeline.out_z,
918            &[],
919            Vec::new(),
920            &[],
921            Vec::new(),
922        );
923    }
924
925    let (f32_fields, u8_fields) = partition_gpu_attribute_fields(&attribute_fields);
926    let runtime = WgpuRuntime::shared()?;
927    let positions = compute_voxel_keys_gpu_buffers(
928        &runtime,
929        x,
930        y,
931        z,
932        [origin.x, origin.y, origin.z],
933        inv_leaf,
934    )?;
935    let point_count = positions.point_count();
936    let segments = build_voxel_segments_gpu_from_keys_buffer(
937        &runtime,
938        positions.keys_buffer(),
939        point_count,
940        point_count.next_power_of_two(),
941    )?;
942    let owned_f32_sources;
943    let f32_refs: Vec<&[f32]> =
944        if let Some(borrowed) = borrow_attribute_f32_channels(input, &f32_fields)? {
945            borrowed
946        } else {
947            owned_f32_sources = collect_attribute_f32_sources(input, &f32_fields)?;
948            owned_f32_sources.iter().map(Vec::as_slice).collect()
949        };
950    let u8_sources = collect_attribute_u8_sources(input, &u8_fields)?;
951    let u8_refs: Vec<&[u8]> = u8_sources.iter().map(Vec::as_slice).collect();
952    let (out_x, out_y, out_z, f32_values, u8_values) = match attribute_policy {
953        AttributeAggregation::Average => gather_voxel_first_xyz_and_average_multi_gpu(
954            &runtime,
955            positions.x_buffer(),
956            positions.y_buffer(),
957            positions.z_buffer(),
958            &f32_refs,
959            &u8_refs,
960            &segments,
961        )?,
962        AttributeAggregation::First => gather_voxel_first_xyz_and_multi_gpu(
963            &runtime,
964            positions.x_buffer(),
965            positions.y_buffer(),
966            positions.z_buffer(),
967            &f32_refs,
968            &u8_refs,
969            &segments,
970        )?,
971    };
972
973    positions.recycle(&runtime);
974
975    assemble_gpu_voxel_output(
976        input,
977        out_x,
978        out_y,
979        out_z,
980        &f32_fields,
981        f32_values,
982        &u8_fields,
983        u8_values,
984    )
985}
986
987#[cfg(not(feature = "filter-voxel-gpu"))]
988fn build_voxel_cells_gpu(
989    _x: &[f32],
990    _y: &[f32],
991    _z: &[f32],
992    _origin: Vec3<f32>,
993    _inv_leaf: f32,
994) -> SpatialResult<VoxelCellMap> {
995    Err(SpatialError::InvalidArgument(
996        "GPU voxel downsampling requires the filter-voxel-gpu feature".to_owned(),
997    ))
998}
999
1000fn compute_bounds(x: &[f32], y: &[f32], z: &[f32]) -> (Vec3<f32>, Vec3<f32>) {
1001    let mut min = Vec3::new(x[0], y[0], z[0]);
1002    let mut max = min;
1003    for index in 1..x.len() {
1004        min.x = min.x.min(x[index]);
1005        min.y = min.y.min(y[index]);
1006        min.z = min.z.min(z[index]);
1007        max.x = max.x.max(x[index]);
1008        max.y = max.y.max(y[index]);
1009        max.z = max.z.max(z[index]);
1010    }
1011    (min, max)
1012}
1013
1014fn fits_u32_voxel_key(min: Vec3<f32>, max: Vec3<f32>, inv_leaf: f32) -> bool {
1015    let limit = u32::MAX as f32;
1016    ((max.x - min.x) * inv_leaf) <= limit
1017        && ((max.y - min.y) * inv_leaf) <= limit
1018        && ((max.z - min.z) * inv_leaf) <= limit
1019}
1020
1021fn voxel_key(x: f32, y: f32, z: f32, origin: Vec3<f32>, inv_leaf: f32) -> (i64, i64, i64) {
1022    let ix = ((x - origin.x) * inv_leaf).floor() as i64;
1023    let iy = ((y - origin.y) * inv_leaf).floor() as i64;
1024    let iz = ((z - origin.z) * inv_leaf).floor() as i64;
1025    (ix, iy, iz)
1026}
1027
1028#[inline]
1029fn voxel_key_nonnegative(
1030    x: f32,
1031    y: f32,
1032    z: f32,
1033    origin: Vec3<f32>,
1034    inv_leaf: f32,
1035) -> (i64, i64, i64) {
1036    let ix = ((x - origin.x) * inv_leaf) as i64;
1037    let iy = ((y - origin.y) * inv_leaf) as i64;
1038    let iz = ((z - origin.z) * inv_leaf) as i64;
1039    (ix, iy, iz)
1040}
1041
1042#[inline]
1043fn voxel_key_nonnegative_u32(
1044    x: f32,
1045    y: f32,
1046    z: f32,
1047    origin: Vec3<f32>,
1048    inv_leaf: f32,
1049) -> (u32, u32, u32) {
1050    let ix = ((x - origin.x) * inv_leaf) as u32;
1051    let iy = ((y - origin.y) * inv_leaf) as u32;
1052    let iz = ((z - origin.z) * inv_leaf) as u32;
1053    (ix, iy, iz)
1054}
1055
1056fn append_voxel_point(
1057    input: &PointCloud,
1058    buffers: &mut PointBufferSet,
1059    fields: &[PointField],
1060    cell: &VoxelCell,
1061    mode: VoxelAggregationMode,
1062    attribute_policy: AttributeAggregation,
1063) -> SpatialResult<()> {
1064    let representative = cell.indices[0];
1065    let average_positions = mode == VoxelAggregationMode::Centroid;
1066
1067    for field in fields {
1068        let value = match field.semantic {
1069            FieldSemantic::PositionX | FieldSemantic::PositionY | FieldSemantic::PositionZ => {
1070                if average_positions {
1071                    average_field(input, field, &cell.indices)?
1072                } else {
1073                    read_field_f32(input, field, representative)?
1074                }
1075            }
1076            _ => match (mode, attribute_policy) {
1077                (VoxelAggregationMode::ApproximateFirst, _) => {
1078                    read_field_f32(input, field, representative)?
1079                }
1080                (_, AttributeAggregation::First) => read_field_f32(input, field, representative)?,
1081                (_, AttributeAggregation::Average) => average_field(input, field, &cell.indices)?,
1082            },
1083        };
1084        push_field(buffers, field, value)?;
1085    }
1086    Ok(())
1087}
1088
1089fn average_field(input: &PointCloud, field: &PointField, indices: &[usize]) -> SpatialResult<f32> {
1090    if indices.is_empty() {
1091        return Err(SpatialError::InvalidArgument("cannot average an empty voxel cell".to_owned()));
1092    }
1093    let mut sum = 0.0_f64;
1094    for &index in indices {
1095        sum += f64::from(read_field_f32(input, field, index)?);
1096    }
1097    Ok((sum / indices.len() as f64) as f32)
1098}
1099
1100fn read_field_f32(input: &PointCloud, field: &PointField, index: usize) -> SpatialResult<f32> {
1101    let buffer = input.field(&field.name)?;
1102    match field.dtype {
1103        DType::F32 | DType::F16 => Ok(buffer.as_f32()?[index]),
1104        DType::F64 => {
1105            let PointBuffer::F64(values) = buffer else {
1106                return Err(SpatialError::UnsupportedDType(field.dtype));
1107            };
1108            Ok(values[index] as f32)
1109        }
1110        DType::U8 => {
1111            let PointBuffer::U8(values) = buffer else {
1112                return Err(SpatialError::UnsupportedDType(field.dtype));
1113            };
1114            Ok(f32::from(values[index]))
1115        }
1116        DType::U16 => {
1117            let PointBuffer::U16(values) = buffer else {
1118                return Err(SpatialError::UnsupportedDType(field.dtype));
1119            };
1120            Ok(f32::from(values[index]))
1121        }
1122        DType::I32 => {
1123            let PointBuffer::I32(values) = buffer else {
1124                return Err(SpatialError::UnsupportedDType(field.dtype));
1125            };
1126            Ok(values[index] as f32)
1127        }
1128        DType::U32 => {
1129            let PointBuffer::U32(values) = buffer else {
1130                return Err(SpatialError::UnsupportedDType(field.dtype));
1131            };
1132            Ok(values[index] as f32)
1133        }
1134    }
1135}
1136
1137#[cfg(feature = "filter-voxel-gpu")]
1138fn set_field_from_f32(
1139    buffers: &mut PointBufferSet,
1140    field: &PointField,
1141    values: Vec<f32>,
1142) -> SpatialResult<()> {
1143    let buffer = match field.dtype {
1144        DType::F32 | DType::F16 => PointBuffer::from_f32(values),
1145        DType::F64 => PointBuffer::F64(values.into_iter().map(f64::from).collect()),
1146        DType::U8 => PointBuffer::U8(values.into_iter().map(|value| value.round() as u8).collect()),
1147        DType::U16 => {
1148            PointBuffer::U16(values.into_iter().map(|value| value.round() as u16).collect())
1149        }
1150        DType::I32 => {
1151            PointBuffer::I32(values.into_iter().map(|value| value.round() as i32).collect())
1152        }
1153        DType::U32 => {
1154            PointBuffer::U32(values.into_iter().map(|value| value.round() as u32).collect())
1155        }
1156    };
1157    buffers.insert(field.name.clone(), buffer);
1158    Ok(())
1159}
1160
1161#[cfg(feature = "filter-voxel-gpu")]
1162fn set_field_from_u8(
1163    buffers: &mut PointBufferSet,
1164    field: &PointField,
1165    values: Vec<u8>,
1166) -> SpatialResult<()> {
1167    if field.dtype != DType::U8 {
1168        return Err(SpatialError::UnsupportedDType(field.dtype));
1169    }
1170    buffers.insert(field.name.clone(), PointBuffer::U8(values));
1171    Ok(())
1172}
1173
1174fn push_field(buffers: &mut PointBufferSet, field: &PointField, value: f32) -> SpatialResult<()> {
1175    let buffer = buffers
1176        .get_mut(&field.name)
1177        .ok_or_else(|| SpatialError::MissingField(field.name.clone()))?;
1178    match field.dtype {
1179        DType::F32 | DType::F16 => buffer.push_f32(value),
1180        DType::F64 => buffer.push_f64(f64::from(value)),
1181        DType::U8 => buffer.push_u8(value.round() as u8),
1182        DType::U16 => buffer.push_u16(value.round() as u16),
1183        DType::I32 => buffer.push_i32(value.round() as i32),
1184        DType::U32 => {
1185            let PointBuffer::U32(values) = buffer else {
1186                return Err(SpatialError::UnsupportedDType(field.dtype));
1187            };
1188            values.push(value.round() as u32);
1189            Ok(())
1190        }
1191    }
1192}
1193
1194#[cfg(test)]
1195mod tests {
1196    use super::{VoxelGridDownsample, VoxelGridDownsampleConfig};
1197    use crate::PointCloudFilter;
1198    #[cfg(feature = "filter-voxel-gpu")]
1199    use spatialrust_core::HasNormals3;
1200    use spatialrust_core::{HasIntensity, HasPositions3, PointCloudBuilder, StandardSchemas};
1201    use spatialrust_math::Vec3;
1202
1203    #[test]
1204    fn centroid_downsample_reduces_points() {
1205        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyz());
1206        builder.push_point([0.0, 0.0, 0.0]).unwrap();
1207        builder.push_point([0.1, 0.0, 0.0]).unwrap();
1208        builder.push_point([1.0, 0.0, 0.0]).unwrap();
1209        builder.push_point([1.1, 0.0, 0.0]).unwrap();
1210        let input = builder.build().unwrap();
1211
1212        let filter = VoxelGridDownsample::new(
1213            VoxelGridDownsampleConfig::centroid(0.5).without_gpu_min_points(),
1214        );
1215        let output = filter.filter(&input).unwrap();
1216        assert_eq!(output.len(), 2);
1217
1218        let (x, _, _) = output.positions3().unwrap();
1219        assert!((x[0] - 0.05).abs() < 1e-5);
1220        assert!((x[1] - 1.05).abs() < 1e-5);
1221    }
1222
1223    #[test]
1224    fn explicit_origin_uses_floor_for_negative_voxels() {
1225        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyz());
1226        builder.push_point([-0.6, 0.0, 0.0]).unwrap();
1227        builder.push_point([-0.1, 0.0, 0.0]).unwrap();
1228        builder.push_point([0.1, 0.0, 0.0]).unwrap();
1229        let input = builder.build().unwrap();
1230
1231        let mut config = VoxelGridDownsampleConfig::centroid(1.0).without_gpu_min_points();
1232        config.origin = Some(Vec3::new(0.0, 0.0, 0.0));
1233        let output = VoxelGridDownsample::new(config).filter(&input).unwrap();
1234
1235        let (x, _, _) = output.positions3().unwrap();
1236        assert_eq!(output.len(), 2);
1237        assert!((x[0] - -0.35).abs() < 1e-5);
1238        assert!((x[1] - 0.1).abs() < 1e-5);
1239    }
1240
1241    #[test]
1242    fn default_origin_fast_path_matches_explicit_min_origin() {
1243        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyz());
1244        for point in
1245            [[-1.2, 0.0, 0.0], [-1.1, 0.1, 0.0], [-0.7, 0.0, 0.0], [0.4, 1.0, 0.0], [0.6, 1.0, 0.0]]
1246        {
1247            builder.push_point(point).unwrap();
1248        }
1249        let input = builder.build().unwrap();
1250
1251        let default_output = VoxelGridDownsample::new(VoxelGridDownsampleConfig::centroid(0.5))
1252            .filter(&input)
1253            .unwrap();
1254
1255        let mut config = VoxelGridDownsampleConfig::centroid(0.5);
1256        config.origin = Some(Vec3::new(-1.2, 0.0, 0.0));
1257        let explicit_output = VoxelGridDownsample::new(config).filter(&input).unwrap();
1258
1259        assert_eq!(default_output, explicit_output);
1260    }
1261
1262    #[test]
1263    fn approximate_keeps_first_point_in_voxel() {
1264        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzi());
1265        builder.push_point([0.0, 0.0, 0.0, 0.2]).unwrap();
1266        builder.push_point([0.1, 0.0, 0.0, 0.9]).unwrap();
1267        let input = builder.build().unwrap();
1268
1269        let filter = VoxelGridDownsample::new(VoxelGridDownsampleConfig::approximate(1.0));
1270        let output = filter.filter(&input).unwrap();
1271        assert_eq!(output.len(), 1);
1272        assert_eq!(output.intensity().unwrap()[0], 0.2);
1273    }
1274
1275    #[test]
1276    fn average_intensity_in_centroid_mode() {
1277        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzi());
1278        builder.push_point([0.0, 0.0, 0.0, 0.2]).unwrap();
1279        builder.push_point([0.1, 0.0, 0.0, 0.8]).unwrap();
1280        let input = builder.build().unwrap();
1281
1282        let filter = VoxelGridDownsample::new(
1283            VoxelGridDownsampleConfig::centroid(1.0).without_gpu_min_points(),
1284        );
1285        let output = filter.filter(&input).unwrap();
1286        assert_eq!(output.len(), 1);
1287        assert!((output.intensity().unwrap()[0] - 0.5).abs() < 1e-5);
1288    }
1289
1290    #[test]
1291    fn rejects_non_positive_leaf_size() {
1292        let mut builder = PointCloudBuilder::xyz();
1293        builder.push_point([0.0, 0.0, 0.0]).unwrap();
1294        let input = builder.build().unwrap();
1295        let filter = VoxelGridDownsample::new(VoxelGridDownsampleConfig::centroid(0.0));
1296        assert!(filter.filter(&input).is_err());
1297    }
1298
1299    #[cfg(feature = "filter-voxel-gpu")]
1300    #[test]
1301    fn gpu_policy_matches_cpu_downsample() {
1302        use spatialrust_core::ExecutionPolicy;
1303
1304        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyz());
1305        builder.push_point([0.0, 0.0, 0.0]).unwrap();
1306        builder.push_point([0.1, 0.0, 0.0]).unwrap();
1307        builder.push_point([1.0, 0.0, 0.0]).unwrap();
1308        builder.push_point([1.1, 0.0, 0.0]).unwrap();
1309        let input = builder.build().unwrap();
1310
1311        let filter = VoxelGridDownsample::new(
1312            VoxelGridDownsampleConfig::centroid(0.5).without_gpu_min_points(),
1313        );
1314        let cpu = filter.filter(&input).unwrap();
1315        let gpu = filter
1316            .filter_with_policy(&input, ExecutionPolicy::Gpu(spatialrust_core::DeviceKind::Wgpu))
1317            .unwrap();
1318
1319        assert_eq!(cpu.len(), gpu.len());
1320        let (cpu_x, _, _) = cpu.positions3().unwrap();
1321        let (gpu_x, _, _) = gpu.positions3().unwrap();
1322        assert!((cpu_x[0] - gpu_x[0]).abs() < 1e-5);
1323        assert!((cpu_x[1] - gpu_x[1]).abs() < 1e-5);
1324    }
1325
1326    #[cfg(feature = "filter-voxel-gpu")]
1327    #[test]
1328    fn gpu_policy_averages_attributes_on_gpu() {
1329        use spatialrust_core::ExecutionPolicy;
1330
1331        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzi());
1332        builder.push_point([0.0, 0.0, 0.0, 0.2]).unwrap();
1333        builder.push_point([0.1, 0.0, 0.0, 0.8]).unwrap();
1334        let input = builder.build().unwrap();
1335
1336        let filter = VoxelGridDownsample::new(
1337            VoxelGridDownsampleConfig::centroid(1.0).without_gpu_min_points(),
1338        );
1339        let cpu = filter.filter(&input).unwrap();
1340        let gpu = filter
1341            .filter_with_policy(&input, ExecutionPolicy::Gpu(spatialrust_core::DeviceKind::Wgpu))
1342            .unwrap();
1343
1344        assert_eq!(cpu.len(), gpu.len());
1345        assert!((cpu.intensity().unwrap()[0] - gpu.intensity().unwrap()[0]).abs() < 1e-5);
1346    }
1347
1348    #[cfg(feature = "filter-voxel-gpu")]
1349    #[test]
1350    fn gpu_policy_averages_u8_rgb_on_gpu() {
1351        use spatialrust_core::{ExecutionPolicy, PointBuffer};
1352
1353        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzrgb());
1354        builder.push_point([0.0, 0.0, 0.0, 10.0, 20.0, 30.0]).unwrap();
1355        builder.push_point([0.1, 0.0, 0.0, 30.0, 40.0, 50.0]).unwrap();
1356        let input = builder.build().unwrap();
1357
1358        let filter = VoxelGridDownsample::new(
1359            VoxelGridDownsampleConfig::centroid(1.0).without_gpu_min_points(),
1360        );
1361        let cpu = filter.filter(&input).unwrap();
1362        let gpu = filter
1363            .filter_with_policy(&input, ExecutionPolicy::Gpu(spatialrust_core::DeviceKind::Wgpu))
1364            .unwrap();
1365
1366        assert_eq!(cpu.len(), gpu.len());
1367        for channel in ["r", "g", "b"] {
1368            let PointBuffer::U8(cpu_values) = cpu.field(channel).unwrap() else {
1369                panic!("expected u8 channel");
1370            };
1371            let PointBuffer::U8(gpu_values) = gpu.field(channel).unwrap() else {
1372                panic!("expected u8 channel");
1373            };
1374            assert_eq!(cpu_values, gpu_values);
1375        }
1376    }
1377
1378    #[cfg(feature = "filter-voxel-gpu")]
1379    #[test]
1380    fn gpu_approximate_first_matches_cpu_downsample() {
1381        use spatialrust_core::ExecutionPolicy;
1382
1383        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzi());
1384        builder.push_point([0.0, 0.0, 0.0, 0.2]).unwrap();
1385        builder.push_point([0.1, 0.0, 0.0, 0.9]).unwrap();
1386        builder.push_point([1.0, 0.0, 0.0, 10.0]).unwrap();
1387        builder.push_point([1.1, 0.0, 0.0, 20.0]).unwrap();
1388        let input = builder.build().unwrap();
1389
1390        let filter = VoxelGridDownsample::new(
1391            VoxelGridDownsampleConfig::approximate(0.5).without_gpu_min_points(),
1392        );
1393        let cpu = filter.filter(&input).unwrap();
1394        let gpu = filter
1395            .filter_with_policy(&input, ExecutionPolicy::Gpu(spatialrust_core::DeviceKind::Wgpu))
1396            .unwrap();
1397
1398        assert_eq!(cpu.len(), gpu.len());
1399        let (cpu_x, _, _) = cpu.positions3().unwrap();
1400        let (gpu_x, _, _) = gpu.positions3().unwrap();
1401        for index in 0..cpu.len() {
1402            assert!((cpu_x[index] - gpu_x[index]).abs() < 1e-5);
1403        }
1404        assert_eq!(cpu.intensity().unwrap(), gpu.intensity().unwrap());
1405    }
1406
1407    #[cfg(feature = "filter-voxel-gpu")]
1408    #[test]
1409    fn gpu_approximate_first_xyzinormal_matches_cpu_downsample() {
1410        use spatialrust_core::ExecutionPolicy;
1411
1412        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzinormal());
1413        builder.push_point([0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 1.0]).unwrap();
1414        builder.push_point([0.1, 0.0, 0.0, 0.9, 0.1, 0.0, 1.0]).unwrap();
1415        builder.push_point([1.0, 0.0, 0.0, 10.0, 0.0, 1.0, 0.0]).unwrap();
1416        builder.push_point([1.1, 0.0, 0.0, 20.0, 0.0, 0.0, 1.0]).unwrap();
1417        let input = builder.build().unwrap();
1418
1419        let filter = VoxelGridDownsample::new(
1420            VoxelGridDownsampleConfig::approximate(0.5).without_gpu_min_points(),
1421        );
1422        let cpu = filter.filter(&input).unwrap();
1423        let gpu = filter
1424            .filter_with_policy(&input, ExecutionPolicy::Gpu(spatialrust_core::DeviceKind::Wgpu))
1425            .unwrap();
1426
1427        assert_eq!(cpu.len(), gpu.len());
1428        let (cpu_x, cpu_y, cpu_z) = cpu.positions3().unwrap();
1429        let (gpu_x, gpu_y, gpu_z) = gpu.positions3().unwrap();
1430        for index in 0..cpu.len() {
1431            assert!((cpu_x[index] - gpu_x[index]).abs() < 1e-5);
1432            assert!((cpu_y[index] - gpu_y[index]).abs() < 1e-5);
1433            assert!((cpu_z[index] - gpu_z[index]).abs() < 1e-5);
1434        }
1435        assert_eq!(cpu.intensity().unwrap(), gpu.intensity().unwrap());
1436        let (cpu_nx, cpu_ny, cpu_nz) = cpu.normals3().unwrap();
1437        let (gpu_nx, gpu_ny, gpu_nz) = gpu.normals3().unwrap();
1438        assert_eq!(cpu_nx, gpu_nx);
1439        assert_eq!(cpu_ny, gpu_ny);
1440        assert_eq!(cpu_nz, gpu_nz);
1441    }
1442
1443    #[cfg(feature = "filter-voxel-gpu")]
1444    #[test]
1445    fn gpu_policy_falls_back_to_cpu_below_threshold() {
1446        use spatialrust_core::ExecutionPolicy;
1447
1448        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyz());
1449        builder.push_point([0.0, 0.0, 0.0]).unwrap();
1450        builder.push_point([0.1, 0.0, 0.0]).unwrap();
1451        let input = builder.build().unwrap();
1452
1453        let filter = VoxelGridDownsample::new(VoxelGridDownsampleConfig::centroid(0.5));
1454        let cpu = filter.filter(&input).unwrap();
1455        let gpu = filter
1456            .filter_with_policy(&input, ExecutionPolicy::Gpu(spatialrust_core::DeviceKind::Wgpu))
1457            .unwrap();
1458
1459        assert_eq!(cpu.len(), gpu.len());
1460        let (cpu_x, _, _) = cpu.positions3().unwrap();
1461        let (gpu_x, _, _) = gpu.positions3().unwrap();
1462        assert!((cpu_x[0] - gpu_x[0]).abs() < 1e-5);
1463    }
1464
1465    #[cfg(feature = "filter-voxel-gpu")]
1466    #[test]
1467    fn auto_policy_uses_cpu_for_small_clouds() {
1468        use spatialrust_core::ExecutionPolicy;
1469
1470        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyz());
1471        builder.push_point([0.0, 0.0, 0.0]).unwrap();
1472        builder.push_point([0.1, 0.0, 0.0]).unwrap();
1473        let input = builder.build().unwrap();
1474
1475        let filter = VoxelGridDownsample::new(VoxelGridDownsampleConfig::centroid(0.5));
1476        let cpu = filter.filter(&input).unwrap();
1477        let auto = filter.filter_with_policy(&input, ExecutionPolicy::Auto).unwrap();
1478
1479        assert_eq!(cpu.len(), auto.len());
1480        let (cpu_x, _, _) = cpu.positions3().unwrap();
1481        let (auto_x, _, _) = auto.positions3().unwrap();
1482        assert!((cpu_x[0] - auto_x[0]).abs() < 1e-5);
1483    }
1484
1485    #[cfg(feature = "filter-voxel-gpu")]
1486    #[test]
1487    fn approximate_default_gpu_threshold_is_higher_than_centroid() {
1488        use super::{
1489            VoxelGridDownsampleConfig, DEFAULT_GPU_MIN_POINTS, DEFAULT_GPU_MIN_POINTS_APPROXIMATE,
1490        };
1491
1492        #[allow(clippy::assertions_on_constants)]
1493        {
1494            assert!(DEFAULT_GPU_MIN_POINTS_APPROXIMATE > DEFAULT_GPU_MIN_POINTS);
1495        }
1496
1497        let centroid = VoxelGridDownsampleConfig::centroid(0.5);
1498        let approximate = VoxelGridDownsampleConfig::approximate(0.5);
1499        assert_eq!(centroid.gpu_min_points, Some(DEFAULT_GPU_MIN_POINTS));
1500        assert_eq!(approximate.gpu_min_points, Some(DEFAULT_GPU_MIN_POINTS_APPROXIMATE));
1501    }
1502
1503    #[test]
1504    fn effective_gpu_min_points_blocks_heavy_approximate_schema() {
1505        use super::{
1506            VoxelGridDownsampleConfig, APPROXIMATE_HEAVY_F32_ATTRIBUTE_CHANNELS,
1507            DEFAULT_GPU_MIN_POINTS_APPROXIMATE, DEFAULT_GPU_MIN_POINTS_APPROXIMATE_HEAVY,
1508        };
1509
1510        let approximate = VoxelGridDownsampleConfig::approximate(1.0);
1511        assert_eq!(
1512            approximate.effective_gpu_min_points(&StandardSchemas::point_xyz()),
1513            Some(DEFAULT_GPU_MIN_POINTS_APPROXIMATE)
1514        );
1515        assert_eq!(
1516            approximate.effective_gpu_min_points(&StandardSchemas::point_xyzinormal()),
1517            Some(DEFAULT_GPU_MIN_POINTS_APPROXIMATE_HEAVY)
1518        );
1519        assert!(
1520            super::count_non_position_f32_fields(&StandardSchemas::point_xyzinormal())
1521                >= APPROXIMATE_HEAVY_F32_ATTRIBUTE_CHANNELS
1522        );
1523    }
1524
1525    #[cfg(feature = "filter-voxel-gpu")]
1526    #[test]
1527    fn auto_approximate_first_uses_cpu_for_xyzinormal() {
1528        use spatialrust_core::ExecutionPolicy;
1529
1530        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzinormal());
1531        for index in 0..128 {
1532            builder
1533                .push_point([
1534                    (index % 16) as f32 * 0.1,
1535                    (index / 16) as f32 * 0.1,
1536                    0.0,
1537                    0.5,
1538                    0.0,
1539                    0.0,
1540                    1.0,
1541                ])
1542                .unwrap();
1543        }
1544        let input = builder.build().unwrap();
1545
1546        let mut config = VoxelGridDownsampleConfig::approximate(0.5);
1547        config.gpu_min_points = Some(10);
1548        let filter = VoxelGridDownsample::new(config);
1549        let cpu = filter.filter(&input).unwrap();
1550        let auto = filter.filter_with_policy(&input, ExecutionPolicy::Auto).unwrap();
1551
1552        assert_eq!(cpu.len(), auto.len());
1553        let (cpu_x, _, _) = cpu.positions3().unwrap();
1554        let (auto_x, _, _) = auto.positions3().unwrap();
1555        for index in 0..cpu.len() {
1556            assert!((cpu_x[index] - auto_x[index]).abs() < 1e-5);
1557        }
1558    }
1559
1560    #[cfg(feature = "filter-voxel-gpu")]
1561    fn synthetic_xyzinormal_plane(point_count: usize) -> spatialrust_core::PointCloud {
1562        let mut builder = PointCloudBuilder::new(StandardSchemas::point_xyzinormal());
1563        for index in 0..point_count {
1564            let x = (index % 256) as f32 * 0.1;
1565            let y = ((index / 256) % 256) as f32 * 0.1;
1566            let intensity = (index % 256) as f32;
1567            builder.push_point([x, y, 0.0, intensity, 0.0, 0.0, 1.0]).unwrap();
1568        }
1569        builder.build().unwrap()
1570    }
1571
1572    #[cfg(feature = "filter-voxel-gpu")]
1573    #[test]
1574    fn auto_approximate_first_uses_cpu_below_heavy_threshold() {
1575        use spatialrust_core::ExecutionPolicy;
1576
1577        const POINT_COUNT: usize = 500_000;
1578        let input = synthetic_xyzinormal_plane(POINT_COUNT);
1579        let filter = VoxelGridDownsample::new(VoxelGridDownsampleConfig::approximate(4.0));
1580        let cpu = filter.filter(&input).unwrap();
1581        let auto = filter.filter_with_policy(&input, ExecutionPolicy::Auto).unwrap();
1582
1583        assert_eq!(cpu.len(), auto.len());
1584        let (cpu_x, cpu_y, cpu_z) = cpu.positions3().unwrap();
1585        let (auto_x, auto_y, auto_z) = auto.positions3().unwrap();
1586        for index in 0..cpu.len() {
1587            assert!((cpu_x[index] - auto_x[index]).abs() < 1e-4);
1588            assert!((cpu_y[index] - auto_y[index]).abs() < 1e-4);
1589            assert!((cpu_z[index] - auto_z[index]).abs() < 1e-4);
1590        }
1591    }
1592
1593    #[cfg(feature = "filter-voxel-gpu")]
1594    #[test]
1595    fn auto_approximate_first_uses_gpu_at_heavy_threshold() {
1596        use spatialrust_core::{DeviceKind, ExecutionPolicy};
1597
1598        const POINT_COUNT: usize = 1_000_000;
1599        let input = synthetic_xyzinormal_plane(POINT_COUNT);
1600        let filter = VoxelGridDownsample::new(VoxelGridDownsampleConfig::approximate(4.0));
1601        let gpu =
1602            filter.filter_with_policy(&input, ExecutionPolicy::Gpu(DeviceKind::Wgpu)).unwrap();
1603        let auto = filter.filter_with_policy(&input, ExecutionPolicy::Auto).unwrap();
1604
1605        assert_eq!(gpu.len(), auto.len());
1606        let (gpu_x, gpu_y, gpu_z) = gpu.positions3().unwrap();
1607        let (auto_x, auto_y, auto_z) = auto.positions3().unwrap();
1608        for index in 0..gpu.len() {
1609            assert!((gpu_x[index] - auto_x[index]).abs() < 1e-4);
1610            assert!((gpu_y[index] - auto_y[index]).abs() < 1e-4);
1611            assert!((gpu_z[index] - auto_z[index]).abs() < 1e-4);
1612        }
1613    }
1614}