diff --git a/Cargo.lock b/Cargo.lock index fd66ecc5670..57be369d7c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9867,6 +9867,7 @@ dependencies = [ "geo-types", "geoarrow", "prost 0.14.4", + "rstest", "vortex-array", "vortex-error", "vortex-session", diff --git a/vortex-geo/Cargo.toml b/vortex-geo/Cargo.toml index 4e76e2eefcb..3b09cb77cf8 100644 --- a/vortex-geo/Cargo.toml +++ b/vortex-geo/Cargo.toml @@ -26,6 +26,7 @@ wkb = { workspace = true } [dev-dependencies] geo-traits = { workspace = true } geo-types = { workspace = true } +rstest = { workspace = true } [lints] workspace = true diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs index 5624886dbb5..3e560bc4734 100644 --- a/vortex-geo/src/extension/coordinate.rs +++ b/vortex-geo/src/extension/coordinate.rs @@ -14,6 +14,7 @@ use std::fmt::Display; use std::fmt::Formatter; +use geoarrow::datatypes::Dimension as GeoArrowDimension; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::arrays::ExtensionArray; @@ -25,6 +26,7 @@ use vortex_array::dtype::DType; use vortex_array::dtype::FieldNames; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; +use vortex_array::dtype::StructFields; use vortex_array::scalar::Scalar; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -63,6 +65,38 @@ impl Dimension { _ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"), }) } + + /// The coordinate field names of this dimension, in GeoArrow order. + pub(crate) fn field_names(self) -> &'static [&'static str] { + match self { + Dimension::Xy => &["x", "y"], + Dimension::Xyz => &["x", "y", "z"], + Dimension::Xym => &["x", "y", "m"], + Dimension::Xyzm => &["x", "y", "z", "m"], + } + } +} + +impl From for Dimension { + fn from(dim: GeoArrowDimension) -> Self { + match dim { + GeoArrowDimension::XY => Dimension::Xy, + GeoArrowDimension::XYZ => Dimension::Xyz, + GeoArrowDimension::XYM => Dimension::Xym, + GeoArrowDimension::XYZM => Dimension::Xyzm, + } + } +} + +impl From for GeoArrowDimension { + fn from(dim: Dimension) -> Self { + match dim { + Dimension::Xy => GeoArrowDimension::XY, + Dimension::Xyz => GeoArrowDimension::XYZ, + Dimension::Xym => GeoArrowDimension::XYM, + Dimension::Xyzm => GeoArrowDimension::XYZM, + } + } } /// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. @@ -122,6 +156,21 @@ pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult { Dimension::from_field_names(fields.names()) } +/// The canonical storage dtype for `dim`: a `Struct` of non-nullable `f64` coordinate fields, +/// with `nullability` at the struct (per-point) level. Inverse of [`coordinate_dimension`]. +pub(crate) fn coordinate_storage_dtype(dim: Dimension, nullability: Nullability) -> DType { + let names = dim.field_names(); + let fields = std::iter::repeat_n( + DType::Primitive(PType::F64, Nullability::NonNullable), + names.len(), + ) + .collect::>(); + DType::Struct( + StructFields::new(FieldNames::from(names), fields), + nullability, + ) +} + /// Decode a [`Coordinate`] from a coordinate `Struct` scalar (`z`/`m` read iff /// present, so the same decoder serves every dimension). pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult { @@ -196,12 +245,14 @@ pub(crate) fn parse_storage( #[cfg(test)] mod tests { + use rstest::rstest; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::dtype::FieldNames; + use vortex_array::dtype::Nullability; use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_array::validity::Validity; @@ -209,26 +260,43 @@ mod tests { use vortex_session::VortexSession; use super::Coordinate; + use super::Dimension; + use super::coordinate_dimension; + use super::coordinate_storage_dtype; use super::parse_storage; use crate::extension::GeoMetadata; use crate::extension::Point; + /// Each dimension round-trips through its field names and canonical storage dtype. + #[rstest] + #[case::xy(Dimension::Xy, &["x", "y"])] + #[case::xyz(Dimension::Xyz, &["x", "y", "z"])] + #[case::xym(Dimension::Xym, &["x", "y", "m"])] + #[case::xyzm(Dimension::Xyzm, &["x", "y", "z", "m"])] + fn storage_dtype_roundtrips_dimension( + #[case] dim: Dimension, + #[case] names: &[&str], + ) -> VortexResult<()> { + assert_eq!(dim.field_names(), names); + let dtype = coordinate_storage_dtype(dim, Nullability::NonNullable); + assert_eq!(coordinate_dimension(&dtype)?, dim); + Ok(()) + } + /// Display emits WKT, including `z`/`m` when present. - #[test] - fn display_is_wkt() { - let coordinate = |z, m| Coordinate { + #[rstest] + #[case::xy(None, None, "POINT(1 2)")] + #[case::xyz(Some(3.0), None, "POINT Z (1 2 3)")] + #[case::xym(None, Some(4.0), "POINT M (1 2 4)")] + #[case::xyzm(Some(3.0), Some(4.0), "POINT ZM (1 2 3 4)")] + fn display_is_wkt(#[case] z: Option, #[case] m: Option, #[case] expected: &str) { + let coordinate = Coordinate { x: 1.0, y: 2.0, z, m, }; - assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)"); - assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)"); - assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)"); - assert_eq!( - coordinate(Some(3.0), Some(4.0)).to_string(), - "POINT ZM (1 2 3 4)" - ); + assert_eq!(coordinate.to_string(), expected); } /// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must diff --git a/vortex-geo/src/extension/mod.rs b/vortex-geo/src/extension/mod.rs index d69dd239d14..cc3a1f6e532 100644 --- a/vortex-geo/src/extension/mod.rs +++ b/vortex-geo/src/extension/mod.rs @@ -6,7 +6,10 @@ mod point; mod wkb; use std::fmt::Display; +use std::sync::Arc; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Metadata; pub use point::*; pub use wkb::*; @@ -30,6 +33,32 @@ impl Display for GeoMetadata { } } +/// The GeoArrow [`Metadata`] equivalent of `geo_metadata`. +pub(crate) fn geoarrow_metadata(geo_metadata: &GeoMetadata) -> Arc { + Arc::new(Metadata::new( + geo_metadata + .crs + .as_ref() + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(), + None, + )) +} + +/// Recover [`GeoMetadata`] from GeoArrow metadata. +pub(crate) fn geo_metadata_from_arrow(metadata: &Metadata) -> GeoMetadata { + let crs = metadata.crs().crs_value().map(|value| { + // `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string + // value, so prefer the raw string when available to round-trip cleanly. For other + // CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form. + value + .as_str() + .map(str::to_string) + .unwrap_or_else(|| value.to_string()) + }); + GeoMetadata { crs } +} + #[cfg(test)] mod tests { use prost::Message; diff --git a/vortex-geo/src/extension/point.rs b/vortex-geo/src/extension/point.rs index 119de3a7e45..ecd544ca51e 100644 --- a/vortex-geo/src/extension/point.rs +++ b/vortex-geo/src/extension/point.rs @@ -6,18 +6,47 @@ //! XYZM — tagged with [`GeoMetadata`] (CRS). `z` is an optional elevation and `m` an optional //! measure: an arbitrary per-point value such as distance along a route or a timestamp. +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::extension::ExtensionType; +use geoarrow::array::IntoArrow; +use geoarrow::array::PointArray; +use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::PointType; use prost::Message; +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::extension::ExtensionArrayExt; +use vortex_array::arrow::ArrowExport; +use vortex_array::arrow::ArrowExportVTable; +use vortex_array::arrow::ArrowImport; +use vortex_array::arrow::ArrowImportVTable; +use vortex_array::arrow::ArrowSession; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::arrow::FromArrowArray; +use vortex_array::dtype::DType; use vortex_array::dtype::extension::ExtDType; use vortex_array::dtype::extension::ExtId; use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::Scalar; use vortex_array::scalar::ScalarValue; use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_session::registry::CachedId; +use vortex_session::registry::Id; use super::GeoMetadata; use super::coordinate::Coordinate; +use super::coordinate::Dimension; use super::coordinate::coordinate_dimension; use super::coordinate::coordinate_from_struct; +use super::coordinate::coordinate_storage_dtype; +use super::geo_metadata_from_arrow; +use super::geoarrow_metadata; /// A single location: `geoarrow.point`, stored as `Struct` of non-nullable `f64`. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] @@ -55,18 +84,143 @@ impl ExtVTable for Point { } } +static ARROW_POINT: CachedId = CachedId::new(PointType::NAME); + +/// The `geoarrow.point` extension type for `dimension`, with separated (struct) coordinates +/// matching `Point` storage. +fn point_type(geo_metadata: &GeoMetadata, dimension: Dimension) -> PointType { + PointType::new(dimension.into(), geoarrow_metadata(geo_metadata)) +} + +impl ArrowExportVTable for Point { + fn arrow_ext_id(&self) -> Id { + *ARROW_POINT + } + + fn vortex_id(&self) -> Id { + self.id() + } + + fn to_arrow_field( + &self, + name: &str, + dtype: &DType, + session: &ArrowSession, + ) -> VortexResult> { + let ext_type = dtype.as_extension(); + let geo_metadata = ext_type.metadata::(); + let dimension = coordinate_dimension(ext_type.storage_dtype())?; + + let mut field = session.to_arrow_field(name, ext_type.storage_dtype())?; + field.try_with_extension_type(point_type(geo_metadata, dimension))?; + + Ok(Some(field)) + } + + fn execute_arrow( + &self, + array: ArrayRef, + target: &Field, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let is_point = array + .dtype() + .as_extension_opt() + .map(|ext| ext.is::()) + .unwrap_or(false); + if !is_point { + return Ok(ArrowExport::Unsupported(array)); + } + + let Ok(point_meta) = target.try_extension_type::() else { + return Ok(ArrowExport::Unsupported(array)); + }; + if point_meta.coord_type() != CoordType::Separated { + return Ok(ArrowExport::Unsupported(array)); + } + + let executed = array.execute::(ctx)?; + let storage = executed.storage_array().clone(); + + let storage_field = Field::new( + String::new(), + target.data_type().clone(), + target.is_nullable(), + ); + let session = ctx.session().clone(); + let arrow_storage = session + .arrow() + .execute_arrow(storage, Some(&storage_field), ctx)?; + + // Round-trip through the GeoArrow point array type: this validates that the storage is + // the separated-coordinate struct layout expected for a `PointType` extension field. + let points = PointArray::try_from((arrow_storage.as_ref(), point_meta)) + .map_err(|e| vortex_err!("failed to construct PointArray: {e}"))?; + + Ok(ArrowExport::Exported(points.into_arrow())) + } +} + +impl ArrowImportVTable for Point { + fn arrow_ext_id(&self) -> Id { + *ARROW_POINT + } + + fn from_arrow_field(&self, field: &Field) -> VortexResult> { + let Ok(point_meta) = field.try_extension_type::() else { + return Ok(None); + }; + vortex_ensure!( + point_meta.coord_type() == CoordType::Separated, + "geoarrow.point with interleaved coordinates is not supported; \ + re-encode with separated (struct) coordinates" + ); + + let storage_dtype = + coordinate_storage_dtype(point_meta.dimension().into(), field.is_nullable().into()); + Ok(Some(DType::Extension( + ExtDType::try_with_vtable( + Point, + geo_metadata_from_arrow(point_meta.metadata()), + storage_dtype, + )? + .erased(), + ))) + } + + fn from_arrow_array( + &self, + array: ArrowArrayRef, + field: &Field, + dtype: &DType, + ) -> VortexResult { + let Some(ext_dtype) = dtype.as_extension_opt() else { + return Ok(ArrowImport::Unsupported(array)); + }; + if !ext_dtype.is::() + || field.try_extension_type::().is_err() + || !matches!(array.data_type(), DataType::Struct(_)) + { + return Ok(ArrowImport::Unsupported(array)); + } + + let storage = ArrayRef::from_arrow(array.as_ref(), field.is_nullable())?; + Ok(ArrowImport::Imported( + ExtensionArray::try_new(ext_dtype.clone(), storage)?.into_array(), + )) + } +} + #[cfg(test)] mod tests { + use rstest::rstest; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; - use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::dtype::DType; - use vortex_array::dtype::FieldNames; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; - use vortex_array::dtype::StructFields; use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_error::VortexResult; @@ -76,8 +230,9 @@ mod tests { use crate::extension::GeoMetadata; use crate::extension::coordinate::Coordinate; use crate::extension::coordinate::Dimension; - use crate::extension::coordinate::coordinate_dimension; use crate::extension::coordinate::coordinate_from_scalar; + use crate::extension::coordinate::coordinate_storage_dtype; + use crate::test_harness::point_column; fn geo_meta() -> GeoMetadata { GeoMetadata { @@ -85,34 +240,15 @@ mod tests { } } - /// A coordinate storage dtype with the given field names, non-nullable `f64` per field. - fn coordinate_dtype(names: &[&'static str]) -> DType { - let fields = std::iter::repeat_n( - DType::Primitive(PType::F64, Nullability::NonNullable), - names.len(), - ) - .collect::>(); - DType::Struct( - StructFields::new(FieldNames::from(names), fields), - Nullability::NonNullable, - ) - } - - /// `Point` accepts every GeoArrow dimension; the canonical field names round-trip to their - /// dimension, so a `z`/`m` swap or a mislabel would be caught. - #[test] - fn point_validates_every_dimension() -> VortexResult<()> { - let cases = [ - (Dimension::Xy, ["x", "y"].as_slice()), - (Dimension::Xyz, ["x", "y", "z"].as_slice()), - (Dimension::Xym, ["x", "y", "m"].as_slice()), - (Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()), - ]; - for (dim, names) in cases { - let storage = coordinate_dtype(names); - assert_eq!(coordinate_dimension(&storage)?, dim); - ExtDType::::try_new(geo_meta(), storage)?; - } + /// `Point` accepts the canonical coordinate storage of every GeoArrow dimension. + #[rstest] + #[case::xy(Dimension::Xy)] + #[case::xyz(Dimension::Xyz)] + #[case::xym(Dimension::Xym)] + #[case::xyzm(Dimension::Xyzm)] + fn point_validates_every_dimension(#[case] dim: Dimension) -> VortexResult<()> { + let storage = coordinate_storage_dtype(dim, Nullability::NonNullable); + ExtDType::::try_new(geo_meta(), storage)?; Ok(()) } @@ -138,19 +274,7 @@ mod tests { let session = VortexSession::empty().with::(); let mut ctx = session.create_execution_ctx(); - let storage = StructArray::from_fields(&[ - ( - "x", - PrimitiveArray::from_iter(vec![1.0f64, -111.7610]).into_array(), - ), - ( - "y", - PrimitiveArray::from_iter(vec![2.0f64, 34.8697]).into_array(), - ), - ])? - .into_array(); - let dtype = ExtDType::::try_new(geo_meta(), storage.dtype().clone())?; - let points = ExtensionArray::new(dtype.erased(), storage).into_array(); + let points = point_column(vec![1.0, -111.7610], vec![2.0, 34.8697])?; assert_eq!( coordinate_from_scalar(&points.execute_scalar(0, &mut ctx)?)?, diff --git a/vortex-geo/src/extension/wkb.rs b/vortex-geo/src/extension/wkb.rs index 172da41b93b..5491d8673d7 100644 --- a/vortex-geo/src/extension/wkb.rs +++ b/vortex-geo/src/extension/wkb.rs @@ -12,8 +12,6 @@ use arrow_schema::extension::ExtensionType; use geoarrow::array::GenericWkbArray; use geoarrow::array::IntoArrow; use geoarrow::array::WkbViewArray; -use geoarrow::datatypes::Crs; -use geoarrow::datatypes::Metadata; use geoarrow::datatypes::WkbType; use prost::Message; use vortex_array::ArrayRef; @@ -35,7 +33,6 @@ use vortex_array::dtype::extension::ExtVTable; use vortex_array::scalar::ScalarValue; use vortex_error::VortexError; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_session::registry::CachedId; @@ -43,6 +40,8 @@ use vortex_session::registry::Id; use wkb::reader::GeometryType; use crate::extension::GeoMetadata; +use crate::extension::geo_metadata_from_arrow; +use crate::extension::geoarrow_metadata; /// A typed handle to an [`ExtensionArray`] that contains WKB-encoded data. /// @@ -71,9 +70,11 @@ impl TryFrom for WellKnownBinaryData { type Error = VortexError; fn try_from(ext: ExtensionArray) -> Result { - if !ext.ext_dtype().is::() { - vortex_bail!("array extension dtype {} is not a WKB", ext.ext_dtype()); - } + vortex_ensure!( + ext.ext_dtype().is::(), + "array extension dtype {} is not a WKB", + ext.ext_dtype() + ); Ok(Self { ext }) } @@ -292,26 +293,9 @@ impl ArrowImportVTable for WellKnownBinary { } fn wkb_type(geo_metadata: &GeoMetadata) -> WkbType { - let metadata = Metadata::new( - geo_metadata - .crs - .as_ref() - .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) - .unwrap_or_default(), - None, - ); - WkbType::new(Arc::new(metadata)) + WkbType::new(geoarrow_metadata(geo_metadata)) } fn geo_metadata(wkb_type: &WkbType) -> GeoMetadata { - let crs = wkb_type.metadata().crs().crs_value().map(|value| { - // `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string - // value, so prefer the raw string when available to round-trip cleanly. For other - // CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form. - value - .as_str() - .map(str::to_string) - .unwrap_or_else(|| value.to_string()) - }); - GeoMetadata { crs } + geo_metadata_from_arrow(wkb_type.metadata()) } diff --git a/vortex-geo/src/lib.rs b/vortex-geo/src/lib.rs index 9d0cde26f9e..d47b0976bc6 100644 --- a/vortex-geo/src/lib.rs +++ b/vortex-geo/src/lib.rs @@ -14,6 +14,11 @@ use crate::scalar_fn::distance::GeoDistance; pub mod extension; pub mod scalar_fn; +#[cfg(test)] +mod test_harness; +#[cfg(test)] +mod tests; + /// Set up a session with support for geospatial extension types, encodings and layouts. pub fn initialize(session: &VortexSession) { // Register the geospatial extension types. @@ -21,311 +26,9 @@ pub fn initialize(session: &VortexSession) { session.arrow().register_exporter(Arc::new(WellKnownBinary)); session.arrow().register_importer(Arc::new(WellKnownBinary)); session.dtypes().register(Point); + session.arrow().register_exporter(Arc::new(Point)); + session.arrow().register_importer(Arc::new(Point)); // Register the geometry scalar functions. session.scalar_fns().register(GeoDistance); } - -#[cfg(test)] -mod tests { - use std::sync::Arc; - use std::sync::LazyLock; - - use arrow_array::Array as _; - use arrow_array::ArrayRef as ArrowArrayRef; - use arrow_array::BinaryArray; - use arrow_array::BinaryViewArray; - use arrow_array::LargeBinaryArray; - use arrow_array::cast::AsArray; - use arrow_schema::DataType; - use arrow_schema::Field; - use arrow_schema::extension::ExtensionType as _; - use geo_traits::to_geo::ToGeoGeometry; - use geo_types::Coord; - use geo_types::Geometry; - use geo_types::LineString; - use geo_types::Polygon; - use geoarrow::datatypes::Crs; - use geoarrow::datatypes::Metadata; - use geoarrow::datatypes::WkbType; - use vortex_array::IntoArray; - use vortex_array::VortexSessionExecute; - use vortex_array::arrays::ExtensionArray; - use vortex_array::arrays::varbin::builder::VarBinBuilder; - use vortex_array::arrow::ArrowSessionExt; - use vortex_array::dtype::DType; - use vortex_array::dtype::Nullability; - use vortex_array::dtype::extension::ExtDType; - use vortex_array::dtype::extension::ExtVTable; - use vortex_array::session::ArraySession; - use vortex_error::VortexResult; - use vortex_error::vortex_err; - use vortex_session::VortexSession; - use wkb::writer::WriteOptions; - - use crate::extension::GeoMetadata; - use crate::extension::WellKnownBinary; - - static SESSION: LazyLock = LazyLock::new(|| { - let session = VortexSession::empty().with::(); - crate::initialize(&session); - session - }); - - #[test] - fn test_array() -> VortexResult<()> { - let mut execution_ctx = SESSION.create_execution_ctx(); - - let mut buf = Vec::new(); - - let mut builder = VarBinBuilder::::with_capacity(3); - - let polygon = Geometry::Polygon(Polygon::new( - LineString::new(vec![ - Coord::zero(), - Coord { x: 100.0, y: 0.0 }, - Coord { x: 100.0, y: 100.0 }, - Coord { x: 0.0, y: 100.0 }, - Coord::zero(), - ]), - vec![], - )); - - // We should always prefer to write little-endian, which is the default option. - wkb::writer::write_geometry(&mut buf, &polygon, &WriteOptions::default()) - .map_err(|e| vortex_err!("writing WKB failed: {e}"))?; - - // Push same polygon 3 times - builder.append_value(&buf); - builder.append_value(&buf); - builder.append_value(&buf); - - let dtype = ExtDType::::try_new( - GeoMetadata { - crs: Some("EPSG:4326".to_string()), - }, - DType::Binary(Nullability::NonNullable), - )?; - - let array = builder.finish(DType::Binary(Nullability::NonNullable)); - let array = ExtensionArray::new(dtype.clone().erased(), array.into_array()).into_array(); - - for idx in 0..3 { - let geom = array.execute_scalar(idx, &mut execution_ctx)?; - let wkb = WellKnownBinary::unpack_native(&dtype, geom.value().unwrap())?; - - assert_eq!(wkb.to_geometry(), polygon); - } - - Ok(()) - } - - fn wkb_extension_array() -> VortexResult<(Vec, vortex_array::ArrayRef)> { - let polygon = Geometry::Polygon(Polygon::new( - LineString::new(vec![ - Coord::zero(), - Coord { x: 100.0, y: 0.0 }, - Coord { x: 100.0, y: 100.0 }, - Coord { x: 0.0, y: 100.0 }, - Coord::zero(), - ]), - vec![], - )); - let mut buf = Vec::new(); - wkb::writer::write_geometry(&mut buf, &polygon, &WriteOptions::default()) - .map_err(|e| vortex_err!("writing WKB failed: {e}"))?; - - let mut builder = VarBinBuilder::::with_capacity(3); - builder.append_value(&buf); - builder.append_value(&buf); - builder.append_value(&buf); - - let dtype = ExtDType::::try_new( - GeoMetadata { - crs: Some("EPSG:4326".to_string()), - }, - DType::Binary(Nullability::NonNullable), - )?; - let storage = builder.finish(DType::Binary(Nullability::NonNullable)); - let array = ExtensionArray::new(dtype.erased(), storage.into_array()).into_array(); - Ok((buf, array)) - } - - #[test] - fn export_arrow_field_carries_wkb_extension() -> VortexResult<()> { - let (_, array) = wkb_extension_array()?; - let field = SESSION.arrow().to_arrow_field("geom", array.dtype())?; - assert_eq!(field.extension_type_name(), Some(WkbType::NAME)); - // Vortex's canonical mapping of `DType::Binary` is Arrow's `BinaryView`. - assert_eq!(field.data_type(), &DataType::BinaryView); - Ok(()) - } - - #[test] - fn execute_arrow_exports_wkb_to_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::Binary, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::Binary); - let binary = exported.as_binary::(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) - } - - #[test] - fn execute_arrow_exports_wkb_to_large_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::LargeBinary, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::LargeBinary); - let binary = exported.as_binary::(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) - } - - #[test] - fn execute_arrow_exports_wkb_to_binary_view() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, array) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::BinaryView, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(array, Some(&field), &mut ctx)?; - - assert_eq!(exported.data_type(), &DataType::BinaryView); - let binary = exported.as_binary_view(); - assert_eq!(binary.len(), 3); - for idx in 0..3 { - assert_eq!(binary.value(idx), wkb_bytes.as_slice()); - } - Ok(()) - } - - fn wkb_field(name: &str, data_type: DataType, nullable: bool, crs: Option<&str>) -> Field { - let crs = crs - .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) - .unwrap_or_default(); - let wkb_type = WkbType::new(Arc::new(Metadata::new(crs, None))); - Field::new(name, data_type, nullable).with_extension_type(wkb_type) - } - - fn assert_imported_wkb_dtype(dtype: &DType, expected_crs: Option<&str>, nullable: bool) { - let DType::Extension(ext) = dtype else { - panic!("expected Extension dtype, got {dtype}"); - }; - assert!(ext.is::()); - assert_eq!(ext.storage_dtype(), &DType::Binary(nullable.into())); - let geo = ext.metadata::(); - assert_eq!(geo.crs.as_deref(), expected_crs); - } - - #[test] - fn import_arrow_field_recovers_wkb_extension() -> VortexResult<()> { - let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); - let dtype = SESSION.arrow().from_arrow_field(&field)?; - assert_imported_wkb_dtype(&dtype, Some("EPSG:4326"), false); - Ok(()) - } - - #[test] - fn import_arrow_field_without_crs() -> VortexResult<()> { - let field = wkb_field("geom", DataType::BinaryView, true, None); - let dtype = SESSION.arrow().from_arrow_field(&field)?; - assert_imported_wkb_dtype(&dtype, None, true); - Ok(()) - } - - #[test] - fn import_arrow_array_from_binary() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, _) = wkb_extension_array()?; - let arrow: ArrowArrayRef = Arc::new(BinaryArray::from_iter_values([ - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - ])); - let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); - - let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; - assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); - - for idx in 0..3 { - let geom = imported.execute_scalar(idx, &mut ctx)?; - assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); - } - Ok(()) - } - - #[test] - fn import_arrow_array_from_large_binary() -> VortexResult<()> { - let (wkb_bytes, _) = wkb_extension_array()?; - let arrow: ArrowArrayRef = Arc::new(LargeBinaryArray::from_iter_values([ - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - ])); - let field = wkb_field("geom", DataType::LargeBinary, false, Some("EPSG:4326")); - - let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; - assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); - assert_eq!(imported.len(), 2); - Ok(()) - } - - #[test] - fn import_arrow_array_from_binary_view() -> VortexResult<()> { - let (wkb_bytes, _) = wkb_extension_array()?; - let arrow: ArrowArrayRef = Arc::new(BinaryViewArray::from_iter_values([ - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - wkb_bytes.as_slice(), - ])); - let field = wkb_field("geom", DataType::BinaryView, false, None); - - let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; - assert_imported_wkb_dtype(imported.dtype(), None, false); - assert_eq!(imported.len(), 4); - Ok(()) - } - - #[test] - fn import_arrow_array_roundtrips_through_export() -> VortexResult<()> { - let mut ctx = SESSION.create_execution_ctx(); - let (wkb_bytes, original) = wkb_extension_array()?; - - let field = Field::new("geom", DataType::BinaryView, false) - .with_extension_type(WkbType::new(Default::default())); - let exported = SESSION - .arrow() - .execute_arrow(original, Some(&field), &mut ctx)?; - let arrow_field = wkb_field("geom", DataType::BinaryView, false, Some("EPSG:4326")); - let reimported = SESSION.arrow().from_arrow_array(exported, &arrow_field)?; - - assert_imported_wkb_dtype(reimported.dtype(), Some("EPSG:4326"), false); - for idx in 0..3 { - let geom = reimported.execute_scalar(idx, &mut ctx)?; - assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); - } - Ok(()) - } -} diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs index e621c4bbad1..c65f1ab78dc 100644 --- a/vortex-geo/src/scalar_fn/distance.rs +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -153,32 +153,13 @@ mod tests { use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; use vortex_array::arrays::ConstantArray; - use vortex_array::arrays::ExtensionArray; - use vortex_array::arrays::PrimitiveArray; - use vortex_array::arrays::StructArray; - use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_error::VortexResult; use vortex_session::VortexSession; use super::GeoDistance; use super::euclidean_distance; - use crate::extension::GeoMetadata; - use crate::extension::Point; - - /// A `Point` column (CRS `EPSG:4326`) over the given x/y coordinates. - fn point_column(xs: Vec, ys: Vec) -> VortexResult { - let storage = StructArray::from_fields(&[ - ("x", PrimitiveArray::from_iter(xs).into_array()), - ("y", PrimitiveArray::from_iter(ys).into_array()), - ])? - .into_array(); - let metadata = GeoMetadata { - crs: Some("EPSG:4326".to_string()), - }; - let dtype = ExtDType::::try_new(metadata, storage.dtype().clone())?; - Ok(ExtensionArray::new(dtype.erased(), storage).into_array()) - } + use crate::test_harness::point_column; /// A constant `Point` column of length `len`, every row at `(x, y)`. fn point_constant( diff --git a/vortex-geo/src/test_harness.rs b/vortex-geo/src/test_harness.rs new file mode 100644 index 00000000000..9f066fb4d14 --- /dev/null +++ b/vortex-geo/src/test_harness.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Shared test helpers for the geospatial extension types. + +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; +use vortex_array::dtype::extension::ExtDType; +use vortex_error::VortexResult; + +use crate::extension::GeoMetadata; +use crate::extension::Point; + +/// A `Point` column (CRS `EPSG:4326`) over the given x/y coordinates. +pub(crate) fn point_column(xs: Vec, ys: Vec) -> VortexResult { + let storage = StructArray::from_fields(&[ + ("x", PrimitiveArray::from_iter(xs).into_array()), + ("y", PrimitiveArray::from_iter(ys).into_array()), + ])? + .into_array(); + let metadata = GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }; + let dtype = ExtDType::::try_new(metadata, storage.dtype().clone())?; + Ok(ExtensionArray::new(dtype.erased(), storage).into_array()) +} diff --git a/vortex-geo/src/tests/mod.rs b/vortex-geo/src/tests/mod.rs new file mode 100644 index 00000000000..86a89a60b5b --- /dev/null +++ b/vortex-geo/src/tests/mod.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop tests for the geospatial extension types, exercising the session wiring set up +//! by [`crate::initialize`]. + +mod point; +mod wkb; + +use std::sync::LazyLock; + +use vortex_array::session::ArraySession; +use vortex_session::VortexSession; + +/// A session with the geospatial types and functions registered. +static SESSION: LazyLock = LazyLock::new(|| { + let session = VortexSession::empty().with::(); + crate::initialize(&session); + session +}); diff --git a/vortex-geo/src/tests/point.rs b/vortex-geo/src/tests/point.rs new file mode 100644 index 00000000000..074ef97f0d3 --- /dev/null +++ b/vortex-geo/src/tests/point.rs @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop for the `vortex.geo.point` extension type (`geoarrow.point`). + +use std::sync::Arc; + +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::Float64Array; +use arrow_array::StructArray as ArrowStructArray; +use arrow_array::cast::AsArray; +use arrow_array::types::Float64Type; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::Fields; +use arrow_schema::extension::ExtensionType as _; +use geoarrow::datatypes::CoordType; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Dimension as GeoArrowDimension; +use geoarrow::datatypes::Metadata; +use geoarrow::datatypes::PointType; +use vortex_array::VortexSessionExecute; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_error::VortexResult; +use vortex_error::vortex_err; + +use super::SESSION; +use crate::extension::Point; +use crate::extension::coordinate::Coordinate; +use crate::extension::coordinate::coordinate_from_scalar; +use crate::test_harness::point_column; + +/// A `geoarrow.point` Arrow field with separated (struct) XY coordinates. +fn point_field(name: &str, nullable: bool, crs: Option<&str>) -> Field { + let crs = crs + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(); + let metadata = Arc::new(Metadata::new(crs, None)); + PointType::new(GeoArrowDimension::XY, metadata).to_field(name, nullable) +} + +/// An Arrow `Struct` point array with non-nullable `Float64` children. +fn arrow_point_struct(xs: Vec, ys: Vec) -> ArrowStructArray { + let fields: Fields = vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ] + .into(); + ArrowStructArray::new( + fields, + vec![ + Arc::new(Float64Array::from(xs)) as ArrowArrayRef, + Arc::new(Float64Array::from(ys)), + ], + None, + ) +} + +/// The exported Arrow field carries the `geoarrow.point` extension over the separated +/// `Struct` coordinate layout. +#[test] +fn export_field_carries_extension() -> VortexResult<()> { + let array = point_column(vec![1.0], vec![2.0])?; + let field = SESSION.arrow().to_arrow_field("loc", array.dtype())?; + + assert_eq!(field.extension_type_name(), Some(PointType::NAME)); + let DataType::Struct(fields) = field.data_type() else { + panic!("expected Struct, got {}", field.data_type()); + }; + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].name(), "x"); + assert_eq!(fields[0].data_type(), &DataType::Float64); + assert_eq!(fields[1].name(), "y"); + assert_eq!(fields[1].data_type(), &DataType::Float64); + Ok(()) +} + +/// Export materializes the point column as an Arrow struct with the original ordinates. +#[test] +fn exports_to_struct() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let array = point_column(vec![1.0, 3.0], vec![2.0, 4.0])?; + + let target = point_field("loc", false, Some("EPSG:4326")); + let exported = SESSION + .arrow() + .execute_arrow(array, Some(&target), &mut ctx)?; + + let points = exported.as_struct(); + let ordinates = |name: &str| -> VortexResult> { + Ok(points + .column_by_name(name) + .ok_or_else(|| vortex_err!("missing {name} column"))? + .as_primitive::() + .values() + .to_vec()) + }; + assert_eq!(ordinates("x")?, vec![1.0, 3.0]); + assert_eq!(ordinates("y")?, vec![2.0, 4.0]); + Ok(()) +} + +/// An imported `geoarrow.point` field maps to the Point extension dtype, recovering the +/// CRS, coordinate field names, and nullability. +#[test] +fn import_field_recovers_extension() -> VortexResult<()> { + let field = point_field("loc", true, Some("EPSG:4326")); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + + let DType::Extension(ext) = &dtype else { + panic!("expected Extension dtype, got {dtype}"); + }; + assert!(ext.is::()); + assert_eq!(ext.metadata::().crs.as_deref(), Some("EPSG:4326")); + + let DType::Struct(fields, nullability) = ext.storage_dtype() else { + panic!("expected Struct storage, got {}", ext.storage_dtype()); + }; + assert_eq!(*nullability, Nullability::Nullable); + let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); + assert_eq!(names, vec!["x", "y"]); + Ok(()) +} + +/// A field with interleaved (`FixedSizeList`) coordinates fails to import. +#[test] +fn import_interleaved_field_fails() { + let point_type = PointType::new(GeoArrowDimension::XY, Default::default()) + .with_coord_type(CoordType::Interleaved); + let field = point_type.to_field("loc", false); + assert!(SESSION.arrow().from_arrow_field(&field).is_err()); +} + +/// Import wraps the Arrow struct's coordinate buffers into a Point column. +#[test] +fn imports_from_struct() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let arrow: ArrowArrayRef = + Arc::new(arrow_point_struct(vec![1.0, -111.7610], vec![2.0, 34.8697])); + let field = point_field("loc", false, Some("EPSG:4326")); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert!( + imported + .dtype() + .as_extension_opt() + .map(|ext| ext.is::()) + .unwrap_or(false) + ); + + assert_eq!( + coordinate_from_scalar(&imported.execute_scalar(0, &mut ctx)?)?, + Coordinate::xy(1.0, 2.0) + ); + assert_eq!( + coordinate_from_scalar(&imported.execute_scalar(1, &mut ctx)?)?, + Coordinate::xy(-111.7610, 34.8697) + ); + Ok(()) +} + +/// A point column exported to Arrow and imported back is unchanged, including the CRS. +#[test] +fn roundtrips_through_arrow() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let original = point_column(vec![0.0, 3.0], vec![4.0, 0.0])?; + + let target = point_field("loc", false, Some("EPSG:4326")); + let exported = SESSION + .arrow() + .execute_arrow(original, Some(&target), &mut ctx)?; + let reimported = SESSION.arrow().from_arrow_array(exported, &target)?; + + let ext = reimported + .dtype() + .as_extension_opt() + .ok_or_else(|| vortex_err!("expected Extension dtype"))?; + assert_eq!(ext.metadata::().crs.as_deref(), Some("EPSG:4326")); + + assert_eq!( + coordinate_from_scalar(&reimported.execute_scalar(0, &mut ctx)?)?, + Coordinate::xy(0.0, 4.0) + ); + assert_eq!( + coordinate_from_scalar(&reimported.execute_scalar(1, &mut ctx)?)?, + Coordinate::xy(3.0, 0.0) + ); + Ok(()) +} diff --git a/vortex-geo/src/tests/wkb.rs b/vortex-geo/src/tests/wkb.rs new file mode 100644 index 00000000000..4ef3d6e2ba8 --- /dev/null +++ b/vortex-geo/src/tests/wkb.rs @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Arrow interop for the `vortex.geo.wkb` extension type (`geoarrow.wkb`). + +use std::sync::Arc; + +use arrow_array::Array as _; +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::BinaryArray; +use arrow_array::BinaryViewArray; +use arrow_array::LargeBinaryArray; +use arrow_array::cast::AsArray; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::extension::ExtensionType as _; +use geo_traits::to_geo::ToGeoGeometry; +use geo_types::Coord; +use geo_types::Geometry; +use geo_types::LineString; +use geo_types::Polygon; +use geoarrow::datatypes::Crs; +use geoarrow::datatypes::Metadata; +use geoarrow::datatypes::WkbType; +use rstest::rstest; +use vortex_array::IntoArray; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::ExtensionArray; +use vortex_array::arrays::varbin::builder::VarBinBuilder; +use vortex_array::arrow::ArrowSessionExt; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::extension::ExtDType; +use vortex_array::dtype::extension::ExtVTable; +use vortex_error::VortexResult; +use vortex_error::vortex_err; +use wkb::writer::WriteOptions; + +use super::SESSION; +use crate::extension::GeoMetadata; +use crate::extension::WellKnownBinary; + +/// The polygon geometry encoded by these tests. +fn test_polygon() -> Geometry { + Geometry::Polygon(Polygon::new( + LineString::new(vec![ + Coord::zero(), + Coord { x: 100.0, y: 0.0 }, + Coord { x: 100.0, y: 100.0 }, + Coord { x: 0.0, y: 100.0 }, + Coord::zero(), + ]), + vec![], + )) +} + +/// A WKB column (CRS `EPSG:4326`) holding [`test_polygon`] three times, along with the +/// polygon's WKB bytes. +fn wkb_extension_array() -> VortexResult<(Vec, vortex_array::ArrayRef)> { + let mut buf = Vec::new(); + // We should always prefer to write little-endian, which is the default option. + wkb::writer::write_geometry(&mut buf, &test_polygon(), &WriteOptions::default()) + .map_err(|e| vortex_err!("writing WKB failed: {e}"))?; + + let mut builder = VarBinBuilder::::with_capacity(3); + builder.append_value(&buf); + builder.append_value(&buf); + builder.append_value(&buf); + + let dtype = ExtDType::::try_new( + GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }, + DType::Binary(Nullability::NonNullable), + )?; + let storage = builder.finish(DType::Binary(Nullability::NonNullable)); + let array = ExtensionArray::new(dtype.erased(), storage.into_array()).into_array(); + Ok((buf, array)) +} + +/// A `geoarrow.wkb` Arrow field over the given binary data type. +fn wkb_field(name: &str, data_type: DataType, nullable: bool, crs: Option<&str>) -> Field { + let crs = crs + .map(|crs| Crs::from_unknown_crs_type(crs.to_string())) + .unwrap_or_default(); + let wkb_type = WkbType::new(Arc::new(Metadata::new(crs, None))); + Field::new(name, data_type, nullable).with_extension_type(wkb_type) +} + +fn assert_imported_wkb_dtype(dtype: &DType, expected_crs: Option<&str>, nullable: bool) { + let DType::Extension(ext) = dtype else { + panic!("expected Extension dtype, got {dtype}"); + }; + assert!(ext.is::()); + assert_eq!(ext.storage_dtype(), &DType::Binary(nullable.into())); + let geo = ext.metadata::(); + assert_eq!(geo.crs.as_deref(), expected_crs); +} + +/// WKB scalars unpack back to the geometry they encode. +#[test] +fn scalar_unpacks_to_geometry() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (_, array) = wkb_extension_array()?; + + let dtype = ExtDType::::try_new( + GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }, + DType::Binary(Nullability::NonNullable), + )?; + + for idx in 0..3 { + let geom = array.execute_scalar(idx, &mut ctx)?; + let wkb = WellKnownBinary::unpack_native(&dtype, geom.value().unwrap())?; + assert_eq!(wkb.to_geometry(), test_polygon()); + } + Ok(()) +} + +/// The exported Arrow field carries the `geoarrow.wkb` extension over Vortex's canonical +/// binary mapping (`BinaryView`). +#[test] +fn export_field_carries_extension() -> VortexResult<()> { + let (_, array) = wkb_extension_array()?; + let field = SESSION.arrow().to_arrow_field("geom", array.dtype())?; + assert_eq!(field.extension_type_name(), Some(WkbType::NAME)); + assert_eq!(field.data_type(), &DataType::BinaryView); + Ok(()) +} + +/// Export materializes the WKB bytes into the requested binary-family Arrow array. +#[rstest] +#[case::binary(DataType::Binary)] +#[case::large_binary(DataType::LargeBinary)] +#[case::binary_view(DataType::BinaryView)] +fn exports_to_binary_family(#[case] data_type: DataType) -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, array) = wkb_extension_array()?; + + let field = Field::new("geom", data_type.clone(), false) + .with_extension_type(WkbType::new(Default::default())); + let exported = SESSION + .arrow() + .execute_arrow(array, Some(&field), &mut ctx)?; + + assert_eq!(exported.data_type(), &data_type); + let values: Vec<&[u8]> = match &data_type { + DataType::Binary => exported.as_binary::().iter().flatten().collect(), + DataType::LargeBinary => exported.as_binary::().iter().flatten().collect(), + DataType::BinaryView => exported.as_binary_view().iter().flatten().collect(), + _ => unreachable!("cases cover only the binary family"), + }; + assert_eq!(values.len(), 3); + for value in values { + assert_eq!(value, wkb_bytes.as_slice()); + } + Ok(()) +} + +/// An imported `geoarrow.wkb` field maps to the WKB extension dtype, recovering the CRS. +#[test] +fn import_field_recovers_extension() -> VortexResult<()> { + let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + assert_imported_wkb_dtype(&dtype, Some("EPSG:4326"), false); + Ok(()) +} + +/// A `geoarrow.wkb` field without a CRS imports as an unreferenced geometry. +#[test] +fn import_field_without_crs() -> VortexResult<()> { + let field = wkb_field("geom", DataType::BinaryView, true, None); + let dtype = SESSION.arrow().from_arrow_field(&field)?; + assert_imported_wkb_dtype(&dtype, None, true); + Ok(()) +} + +/// Import wraps the binary-family Arrow array's WKB values unchanged. +#[test] +fn imports_from_binary() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, _) = wkb_extension_array()?; + let arrow: ArrowArrayRef = Arc::new(BinaryArray::from_iter_values([ + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + ])); + let field = wkb_field("geom", DataType::Binary, false, Some("EPSG:4326")); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); + + for idx in 0..3 { + let geom = imported.execute_scalar(idx, &mut ctx)?; + assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); + } + Ok(()) +} + +/// Import wraps the binary-family Arrow array's WKB values unchanged. +#[test] +fn imports_from_large_binary() -> VortexResult<()> { + let (wkb_bytes, _) = wkb_extension_array()?; + let arrow: ArrowArrayRef = Arc::new(LargeBinaryArray::from_iter_values([ + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + ])); + let field = wkb_field("geom", DataType::LargeBinary, false, Some("EPSG:4326")); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert_imported_wkb_dtype(imported.dtype(), Some("EPSG:4326"), false); + assert_eq!(imported.len(), 2); + Ok(()) +} + +/// Import wraps the binary-family Arrow array's WKB values unchanged. +#[test] +fn imports_from_binary_view() -> VortexResult<()> { + let (wkb_bytes, _) = wkb_extension_array()?; + let arrow: ArrowArrayRef = Arc::new(BinaryViewArray::from_iter_values([ + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + wkb_bytes.as_slice(), + ])); + let field = wkb_field("geom", DataType::BinaryView, false, None); + + let imported = SESSION.arrow().from_arrow_array(arrow, &field)?; + assert_imported_wkb_dtype(imported.dtype(), None, false); + assert_eq!(imported.len(), 4); + Ok(()) +} + +/// A WKB column exported to Arrow and imported back is unchanged, byte for byte. +#[test] +fn roundtrips_through_arrow() -> VortexResult<()> { + let mut ctx = SESSION.create_execution_ctx(); + let (wkb_bytes, original) = wkb_extension_array()?; + + let field = Field::new("geom", DataType::BinaryView, false) + .with_extension_type(WkbType::new(Default::default())); + let exported = SESSION + .arrow() + .execute_arrow(original, Some(&field), &mut ctx)?; + let arrow_field = wkb_field("geom", DataType::BinaryView, false, Some("EPSG:4326")); + let reimported = SESSION.arrow().from_arrow_array(exported, &arrow_field)?; + + assert_imported_wkb_dtype(reimported.dtype(), Some("EPSG:4326"), false); + for idx in 0..3 { + let geom = reimported.execute_scalar(idx, &mut ctx)?; + assert_eq!(geom.value().unwrap().as_binary().as_slice(), wkb_bytes); + } + Ok(()) +}