From 61f8fe116d8c409668a8d0026a7cb5449047c67c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Fri, 15 Aug 2025 16:05:50 +0200 Subject: [PATCH 1/6] Initial commit almost working more cleanup merge race and lint --- .../ros2msg/sensor_msgs/point_cloud_2.rs | 209 +++++++++++++++++- 1 file changed, 204 insertions(+), 5 deletions(-) diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs index fc521016fba7..749a7377cc97 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs @@ -1,18 +1,20 @@ use std::io::Cursor; use super::super::definitions::sensor_msgs::{self, PointField, PointFieldDatatype}; +use anyhow::Context as _; use arrow::{ array::{ - BooleanBuilder, FixedSizeListBuilder, ListBuilder, StringBuilder, StructBuilder, - UInt8Builder, UInt32Builder, + ArrayBuilder, BooleanBuilder, FixedSizeListBuilder, Float32Builder, Float64Builder, + Int8Builder, Int16Builder, Int32Builder, ListBuilder, StringBuilder, StructBuilder, + UInt8Builder, UInt16Builder, UInt32Builder, }, datatypes::{DataType, Field, Fields}, }; use byteorder::{BigEndian, LittleEndian, ReadBytesExt as _}; use re_chunk::{Chunk, ChunkComponents, ChunkId}; use re_types::{ - AsComponents as _, Component as _, ComponentDescriptor, SerializedComponentColumn, archetypes, - components, reflection::ComponentDescriptorExt as _, + Archetype as _, AsComponents as _, Component as _, ComponentDescriptor, + SerializedComponentColumn, archetypes, components, reflection::ComponentDescriptorExt as _, }; use std::collections::HashMap; @@ -38,6 +40,8 @@ pub struct PointCloud2MessageParser { data: FixedSizeListBuilder>, is_dense: FixedSizeListBuilder, + points: Vec<(String, ListBuilder>)>, + // We lazily create this, only if we can interpret the point cloud semantically. // For now, this is the case if there are fields with names `x`,`y`, and `z` present. points_3ds: Option>, @@ -80,11 +84,27 @@ impl Ros2MessageParser for PointCloud2MessageParser { data: blob_list_builder(num_rows), is_dense: fixed_size_list_builder(1, num_rows), + points: Default::default(), + points_3ds: None, } } } +fn builder_from_datatype(datatype: PointFieldDatatype) -> Box { + match datatype { + PointFieldDatatype::Unknown => unreachable!(), + PointFieldDatatype::Int8 => Box::new(Int8Builder::new()), + PointFieldDatatype::UInt8 => Box::new(UInt8Builder::new()), + PointFieldDatatype::Int16 => Box::new(Int16Builder::new()), + PointFieldDatatype::UInt16 => Box::new(UInt16Builder::new()), + PointFieldDatatype::Int32 => Box::new(Int32Builder::new()), + PointFieldDatatype::UInt32 => Box::new(UInt32Builder::new()), + PointFieldDatatype::Float32 => Box::new(Float32Builder::new()), + PointFieldDatatype::Float64 => Box::new(Float64Builder::new()), + } +} + fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std::io::Result { let mut rdr = Cursor::new(data); match (is_big_endian, datatype) { @@ -106,6 +126,22 @@ fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std } } +impl From for DataType { + fn from(value: PointFieldDatatype) -> Self { + match value { + PointFieldDatatype::Unknown => unreachable!(), // Not part of the MCAP spec + PointFieldDatatype::Int8 => Self::Int8, + PointFieldDatatype::UInt8 => Self::UInt8, + PointFieldDatatype::Int16 => Self::Int16, + PointFieldDatatype::UInt16 => Self::UInt16, + PointFieldDatatype::Int32 => Self::Int32, + PointFieldDatatype::UInt32 => Self::UInt32, + PointFieldDatatype::Float32 => Self::Float32, + PointFieldDatatype::Float64 => Self::Float64, + } + } +} + pub struct Position3DIter<'a> { point_iter: std::slice::ChunksExact<'a, u8>, is_big_endian: bool, @@ -172,6 +208,128 @@ impl Iterator for Position3DIter<'_> { } } +fn add_field_value( + builder: &mut Box, + field: &PointField, + is_big_endian: bool, + data: &[u8], +) -> anyhow::Result<()> { + let mut rdr = Cursor::new(data); + match field.datatype { + PointFieldDatatype::Unknown => unreachable!(), + PointFieldDatatype::Int8 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `Int8Builder`", field.datatype) + })?; + let val = rdr.read_i8()?; + builder.append_value(val); + } + PointFieldDatatype::UInt8 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `UInt8Builder`", field.datatype) + })?; + let val = rdr.read_u8()?; + builder.append_value(val); + } + PointFieldDatatype::Int16 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `Int16Builder`", field.datatype) + })?; + let val = if is_big_endian { + rdr.read_i16::()? + } else { + rdr.read_i16::()? + }; + builder.append_value(val); + } + PointFieldDatatype::UInt16 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `UInt16Builder`", field.datatype) + })?; + let val = if is_big_endian { + rdr.read_u16::()? + } else { + rdr.read_u16::()? + }; + builder.append_value(val); + } + + PointFieldDatatype::Int32 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `Int32Builder`", field.datatype) + })?; + + let val = if is_big_endian { + rdr.read_i32::()? + } else { + rdr.read_i32::()? + }; + builder.append_value(val); + } + PointFieldDatatype::UInt32 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `UInt16Builder`", field.datatype) + })?; + let val = if is_big_endian { + rdr.read_u32::()? + } else { + rdr.read_u32::()? + }; + builder.append_value(val); + } + + PointFieldDatatype::Float32 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `Float32Builder`", field.datatype) + })?; + let val = if is_big_endian { + rdr.read_f32::()? + } else { + rdr.read_f32::()? + }; + builder.append_value(val); + } + + PointFieldDatatype::Float64 => { + let builder = builder + .as_any_mut() + .downcast_mut::() + .with_context(|| { + format!("found datatype {:?}, but `Float64Builder`", field.datatype) + })?; + let val = if is_big_endian { + rdr.read_f64::()? + } else { + rdr.read_f64::()? + }; + builder.append_value(val); + } + } + + Ok(()) +} + impl MessageParser for PointCloud2MessageParser { fn append(&mut self, ctx: &mut ParserContext, msg: &mcap::Message<'_>) -> anyhow::Result<()> { let point_cloud = cdr::try_decode_message::(msg.data.as_ref()) @@ -194,6 +352,8 @@ impl MessageParser for PointCloud2MessageParser { data, is_dense, + points, + points_3ds, } = self; @@ -207,6 +367,31 @@ impl MessageParser for PointCloud2MessageParser { &point_cloud.fields, ); + // We lazily initialize the builders for the values in the point cloud. + if points.len() != point_cloud.fields.len() { + *points = point_cloud + .fields + .iter() + .map(|field| { + ( + field.name.clone(), + ListBuilder::new(builder_from_datatype(field.datatype)), + ) + }) + .collect(); + } + + for point in point_cloud.data.chunks(point_cloud.point_step as usize) { + for (field, (_name, builder)) in point_cloud.fields.iter().zip(points.iter_mut()) { + let field_builder = builder.values(); + add_field_value(field_builder, field, point_cloud.is_bigendian, point)?; + } + } + + for (_name, builder) in points { + builder.append(true); + } + if let Some(position_iter) = position_iter { points_3ds .get_or_insert_with(|| Vec::with_capacity(*num_rows)) @@ -290,12 +475,14 @@ impl MessageParser for PointCloud2MessageParser { mut data, mut is_dense, + points, + points_3ds, } = *self; let mut chunks = Vec::new(); - for (i, points_3d) in points_3ds.into_iter().enumerate() { + for (i, points_3d) in points_3ds.iter().enumerate() { let timelines = timelines .iter() .map(|(timeline, time_col)| (*timeline, time_col.row_sliced(i, 1).clone())) @@ -365,6 +552,18 @@ impl MessageParser for PointCloud2MessageParser { ), ] .into_iter() + .chain(points.into_iter().filter_map(|(name, mut builder)| { + points_3ds.as_ref()?; + if ["x", "y", "z"].contains(&name.as_str()) { + None + } else { + Some(( + ComponentDescriptor::partial(name.clone()) + .with_builtin_archetype(archetypes::Points3D::name()), + builder.finish(), + )) + } + })) .collect(), )?; From 74eb79d2369fcb8193fb605d1ab99446fcbc33a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Fri, 12 Sep 2025 14:39:31 +0200 Subject: [PATCH 2/6] Update crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs Co-authored-by: Gijs de Jong <14833076+oxkitsune@users.noreply.github.com> --- .../re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs index 749a7377cc97..77988d4af86d 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs @@ -129,7 +129,7 @@ fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std impl From for DataType { fn from(value: PointFieldDatatype) -> Self { match value { - PointFieldDatatype::Unknown => unreachable!(), // Not part of the MCAP spec + PointFieldDatatype::Unknown => unreachable!(), // Not part of the ROS2 spec PointFieldDatatype::Int8 => Self::Int8, PointFieldDatatype::UInt8 => Self::UInt8, PointFieldDatatype::Int16 => Self::Int16, From 961423d960cefde15c7d5bd4d0707cca94f4ef5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Fri, 12 Sep 2025 14:38:59 +0200 Subject: [PATCH 3/6] feedback --- .../ros2msg/definitions/sensor_msgs.rs | 1 + .../ros2msg/sensor_msgs/point_cloud_2.rs | 31 +++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs b/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs index 2e7ab119b847..f8f12c7975a3 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs @@ -114,6 +114,7 @@ pub struct Imu { #[repr(u8)] pub enum PointFieldDatatype { /// Does not exist in original spec. + /// Unknown = 0, Int8 = 1, UInt8 = 2, diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs index 77988d4af86d..50d03594e19b 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs @@ -40,7 +40,7 @@ pub struct PointCloud2MessageParser { data: FixedSizeListBuilder>, is_dense: FixedSizeListBuilder, - points: Vec<(String, ListBuilder>)>, + extracted_fields: Vec<(String, ListBuilder>)>, // We lazily create this, only if we can interpret the point cloud semantically. // For now, this is the case if there are fields with names `x`,`y`, and `z` present. @@ -84,7 +84,7 @@ impl Ros2MessageParser for PointCloud2MessageParser { data: blob_list_builder(num_rows), is_dense: fixed_size_list_builder(1, num_rows), - points: Default::default(), + extracted_fields: Default::default(), points_3ds: None, } @@ -129,7 +129,9 @@ fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std impl From for DataType { fn from(value: PointFieldDatatype) -> Self { match value { - PointFieldDatatype::Unknown => unreachable!(), // Not part of the ROS2 spec + // Not part of the ROS2 spec + // https://docs.ros.org/en/noetic/api/sensor_msgs/html/msg/PointField.html + PointFieldDatatype::Unknown => unreachable!(), PointFieldDatatype::Int8 => Self::Int8, PointFieldDatatype::UInt8 => Self::UInt8, PointFieldDatatype::Int16 => Self::Int16, @@ -352,7 +354,7 @@ impl MessageParser for PointCloud2MessageParser { data, is_dense, - points, + extracted_fields, points_3ds, } = self; @@ -367,9 +369,10 @@ impl MessageParser for PointCloud2MessageParser { &point_cloud.fields, ); - // We lazily initialize the builders for the values in the point cloud. - if points.len() != point_cloud.fields.len() { - *points = point_cloud + // We lazily initialize the builders that store the extracted fields from + // the blob when we receive the first message. + if extracted_fields.len() != point_cloud.fields.len() { + *extracted_fields = point_cloud .fields .iter() .map(|field| { @@ -382,13 +385,15 @@ impl MessageParser for PointCloud2MessageParser { } for point in point_cloud.data.chunks(point_cloud.point_step as usize) { - for (field, (_name, builder)) in point_cloud.fields.iter().zip(points.iter_mut()) { + for (field, (_name, builder)) in + point_cloud.fields.iter().zip(extracted_fields.iter_mut()) + { let field_builder = builder.values(); add_field_value(field_builder, field, point_cloud.is_bigendian, point)?; } } - for (_name, builder) in points { + for (_name, builder) in extracted_fields { builder.append(true); } @@ -475,7 +480,7 @@ impl MessageParser for PointCloud2MessageParser { mut data, mut is_dense, - points, + extracted_fields: points, points_3ds, } = *self; @@ -553,6 +558,12 @@ impl MessageParser for PointCloud2MessageParser { ] .into_iter() .chain(points.into_iter().filter_map(|(name, mut builder)| { + // We only extract additional fields when we have a `Points3d` + // archetype to attach them to. In that case we're not interested + // in the other components. + // TODO(@grtlr): It would be nice to never initialize the unnecessary builders + // in the first place. But, we'll soon move the semantic extraction of `Points3d` + // into a different layer anyways, making that optimization obsolete. points_3ds.as_ref()?; if ["x", "y", "z"].contains(&name.as_str()) { None From 477f8f9ac062ba7dc61263ddf27a4b852580f2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Fri, 12 Sep 2025 14:43:08 +0200 Subject: [PATCH 4/6] lint --- .../src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs index 50d03594e19b..09cae4c71662 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs @@ -129,8 +129,8 @@ fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std impl From for DataType { fn from(value: PointFieldDatatype) -> Self { match value { - // Not part of the ROS2 spec - // https://docs.ros.org/en/noetic/api/sensor_msgs/html/msg/PointField.html + // Not part of the MCAP spec + // // https://docs.ros.org/en/noetic/api/sensor_msgs/html/msg/PointField.html PointFieldDatatype::Unknown => unreachable!(), PointFieldDatatype::Int8 => Self::Int8, PointFieldDatatype::UInt8 => Self::UInt8, @@ -561,7 +561,7 @@ impl MessageParser for PointCloud2MessageParser { // We only extract additional fields when we have a `Points3d` // archetype to attach them to. In that case we're not interested // in the other components. - // TODO(@grtlr): It would be nice to never initialize the unnecessary builders + // TODO(grtlr): It would be nice to never initialize the unnecessary builders // in the first place. But, we'll soon move the semantic extraction of `Points3d` // into a different layer anyways, making that optimization obsolete. points_3ds.as_ref()?; From ee40d84a62d2be919d3d6840f34dd7ec7033b344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Mon, 15 Sep 2025 11:29:09 +0200 Subject: [PATCH 5/6] Improve error handling around `PointFieldDatatype` --- .../ros2msg/definitions/sensor_msgs.rs | 21 +++++++++++-------- .../ros2msg/sensor_msgs/point_cloud_2.rs | 6 ------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs b/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs index f8f12c7975a3..387883746877 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/definitions/sensor_msgs.rs @@ -110,12 +110,9 @@ pub struct Imu { } #[derive(Copy, Clone, Debug, Serialize, Deserialize)] -#[serde(from = "u8", into = "u8")] +#[serde(try_from = "u8", into = "u8")] #[repr(u8)] pub enum PointFieldDatatype { - /// Does not exist in original spec. - /// - Unknown = 0, Int8 = 1, UInt8 = 2, Int16 = 3, @@ -126,9 +123,15 @@ pub enum PointFieldDatatype { Float64 = 8, } -impl From for PointFieldDatatype { - fn from(value: u8) -> Self { - match value { +#[derive(Debug, thiserror::Error)] +#[error("unknown point field datatype: {0}")] +pub struct UnknownPointFieldDatatype(u8); + +impl TryFrom for PointFieldDatatype { + type Error = UnknownPointFieldDatatype; + + fn try_from(value: u8) -> Result { + Ok(match value { 1 => Self::Int8, 2 => Self::UInt8, 3 => Self::Int16, @@ -137,8 +140,8 @@ impl From for PointFieldDatatype { 6 => Self::UInt32, 7 => Self::Float32, 8 => Self::Float64, - _ => Self::Unknown, - } + other => Err(UnknownPointFieldDatatype(other))?, + }) } } diff --git a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs index 09cae4c71662..9fd39df4a838 100644 --- a/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs +++ b/crates/utils/re_mcap/src/parsers/ros2msg/sensor_msgs/point_cloud_2.rs @@ -93,7 +93,6 @@ impl Ros2MessageParser for PointCloud2MessageParser { fn builder_from_datatype(datatype: PointFieldDatatype) -> Box { match datatype { - PointFieldDatatype::Unknown => unreachable!(), PointFieldDatatype::Int8 => Box::new(Int8Builder::new()), PointFieldDatatype::UInt8 => Box::new(UInt8Builder::new()), PointFieldDatatype::Int16 => Box::new(Int16Builder::new()), @@ -108,7 +107,6 @@ fn builder_from_datatype(datatype: PointFieldDatatype) -> Box fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std::io::Result { let mut rdr = Cursor::new(data); match (is_big_endian, datatype) { - (_, PointFieldDatatype::Unknown) => Ok(0f32), // Not in the original spec. (_, PointFieldDatatype::UInt8) => rdr.read_u8().map(|x| x as f32), (_, PointFieldDatatype::Int8) => rdr.read_i8().map(|x| x as f32), (true, PointFieldDatatype::Int16) => rdr.read_i16::().map(|x| x as f32), @@ -129,9 +127,6 @@ fn access(data: &[u8], datatype: PointFieldDatatype, is_big_endian: bool) -> std impl From for DataType { fn from(value: PointFieldDatatype) -> Self { match value { - // Not part of the MCAP spec - // // https://docs.ros.org/en/noetic/api/sensor_msgs/html/msg/PointField.html - PointFieldDatatype::Unknown => unreachable!(), PointFieldDatatype::Int8 => Self::Int8, PointFieldDatatype::UInt8 => Self::UInt8, PointFieldDatatype::Int16 => Self::Int16, @@ -218,7 +213,6 @@ fn add_field_value( ) -> anyhow::Result<()> { let mut rdr = Cursor::new(data); match field.datatype { - PointFieldDatatype::Unknown => unreachable!(), PointFieldDatatype::Int8 => { let builder = builder .as_any_mut() From 09a115b1f4e15030f76c40f821c2ac04403bea7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Mon, 15 Sep 2025 15:41:28 +0200 Subject: [PATCH 6/6] update snapshot --- ...s__assert_chunk_snapshot@supported_ros2_messages.mcap.snap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__assert_chunk_snapshot@supported_ros2_messages.mcap.snap b/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__assert_chunk_snapshot@supported_ros2_messages.mcap.snap index d433c9bd7e64..74e05bce7003 100644 --- a/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__assert_chunk_snapshot@supported_ros2_messages.mcap.snap +++ b/crates/store/re_data_loader/tests/snapshots/test_mcap_loader__tests__assert_chunk_snapshot@supported_ros2_messages.mcap.snap @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fe309bc865899cacf618eb9a2c38ba16aa457bce7032b44defb44ef3161b1ed -size 441989 +oid sha256:a5007101f4ae92b50171c12aed1b51ceb482503aade827248c8d8c8fabb146f4 +size 442998