Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vortex-geo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ wkb = { workspace = true }
[dev-dependencies]
geo-traits = { workspace = true }
geo-types = { workspace = true }
rstest = { workspace = true }

[lints]
workspace = true
88 changes: 78 additions & 10 deletions vortex-geo/src/extension/coordinate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
use std::fmt::Display;
use std::fmt::Formatter;

use geoarrow::datatypes::Dimension as GeoArrowDimension;
use vortex_array::ArrayRef;
use vortex_array::ExecutionCtx;
use vortex_array::arrays::ExtensionArray;
Expand All @@ -25,6 +26,7 @@ use vortex_array::dtype::DType;
use vortex_array::dtype::FieldNames;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::dtype::StructFields;
use vortex_array::scalar::Scalar;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
Expand Down Expand Up @@ -63,6 +65,38 @@ impl Dimension {
_ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"),
})
}

/// The coordinate field names of this dimension, in GeoArrow order.
pub(crate) fn field_names(self) -> &'static [&'static str] {
match self {
Dimension::Xy => &["x", "y"],
Dimension::Xyz => &["x", "y", "z"],
Dimension::Xym => &["x", "y", "m"],
Dimension::Xyzm => &["x", "y", "z", "m"],
}
}
}

impl From<GeoArrowDimension> for Dimension {
fn from(dim: GeoArrowDimension) -> Self {
match dim {
GeoArrowDimension::XY => Dimension::Xy,
GeoArrowDimension::XYZ => Dimension::Xyz,
GeoArrowDimension::XYM => Dimension::Xym,
GeoArrowDimension::XYZM => Dimension::Xyzm,
}
}
}

impl From<Dimension> for GeoArrowDimension {
fn from(dim: Dimension) -> Self {
match dim {
Dimension::Xy => GeoArrowDimension::XY,
Dimension::Xyz => GeoArrowDimension::XYZ,
Dimension::Xym => GeoArrowDimension::XYM,
Dimension::Xyzm => GeoArrowDimension::XYZM,
}
}
}

/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them.
Expand Down Expand Up @@ -122,6 +156,21 @@ pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> {
Dimension::from_field_names(fields.names())
}

/// The canonical storage dtype for `dim`: a `Struct` of non-nullable `f64` coordinate fields,
/// with `nullability` at the struct (per-point) level. Inverse of [`coordinate_dimension`].
pub(crate) fn coordinate_storage_dtype(dim: Dimension, nullability: Nullability) -> DType {
let names = dim.field_names();
let fields = std::iter::repeat_n(
DType::Primitive(PType::F64, Nullability::NonNullable),
names.len(),
)
.collect::<Vec<_>>();
DType::Struct(
StructFields::new(FieldNames::from(names), fields),
nullability,
)
}

/// Decode a [`Coordinate`] from a coordinate `Struct<x, y[, z][, m]>` scalar (`z`/`m` read iff
/// present, so the same decoder serves every dimension).
pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> {
Expand Down Expand Up @@ -196,39 +245,58 @@ pub(crate) fn parse_storage(

#[cfg(test)]
mod tests {
use rstest::rstest;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::dtype::FieldNames;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::extension::ExtDType;
use vortex_array::session::ArraySession;
use vortex_array::validity::Validity;
use vortex_error::VortexResult;
use vortex_session::VortexSession;

use super::Coordinate;
use super::Dimension;
use super::coordinate_dimension;
use super::coordinate_storage_dtype;
use super::parse_storage;
use crate::extension::GeoMetadata;
use crate::extension::Point;

/// Each dimension round-trips through its field names and canonical storage dtype.
#[rstest]
#[case::xy(Dimension::Xy, &["x", "y"])]
#[case::xyz(Dimension::Xyz, &["x", "y", "z"])]
#[case::xym(Dimension::Xym, &["x", "y", "m"])]
#[case::xyzm(Dimension::Xyzm, &["x", "y", "z", "m"])]
fn storage_dtype_roundtrips_dimension(
#[case] dim: Dimension,
#[case] names: &[&str],
) -> VortexResult<()> {
assert_eq!(dim.field_names(), names);
let dtype = coordinate_storage_dtype(dim, Nullability::NonNullable);
assert_eq!(coordinate_dimension(&dtype)?, dim);
Ok(())
}

/// Display emits WKT, including `z`/`m` when present.
#[test]
fn display_is_wkt() {
let coordinate = |z, m| Coordinate {
#[rstest]
#[case::xy(None, None, "POINT(1 2)")]
#[case::xyz(Some(3.0), None, "POINT Z (1 2 3)")]
#[case::xym(None, Some(4.0), "POINT M (1 2 4)")]
#[case::xyzm(Some(3.0), Some(4.0), "POINT ZM (1 2 3 4)")]
fn display_is_wkt(#[case] z: Option<f64>, #[case] m: Option<f64>, #[case] expected: &str) {
let coordinate = Coordinate {
x: 1.0,
y: 2.0,
z,
m,
};
assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)");
assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)");
assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)");
assert_eq!(
coordinate(Some(3.0), Some(4.0)).to_string(),
"POINT ZM (1 2 3 4)"
);
assert_eq!(coordinate.to_string(), expected);
}

/// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must
Expand Down
29 changes: 29 additions & 0 deletions vortex-geo/src/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ mod point;
mod wkb;

use std::fmt::Display;
use std::sync::Arc;

use geoarrow::datatypes::Crs;
use geoarrow::datatypes::Metadata;
pub use point::*;
pub use wkb::*;

Expand All @@ -30,6 +33,32 @@ impl Display for GeoMetadata {
}
}

/// The GeoArrow [`Metadata`] equivalent of `geo_metadata`.
pub(crate) fn geoarrow_metadata(geo_metadata: &GeoMetadata) -> Arc<Metadata> {
Arc::new(Metadata::new(
geo_metadata
.crs
.as_ref()
.map(|crs| Crs::from_unknown_crs_type(crs.to_string()))
.unwrap_or_default(),
None,
))
}

/// Recover [`GeoMetadata`] from GeoArrow metadata.
pub(crate) fn geo_metadata_from_arrow(metadata: &Metadata) -> GeoMetadata {
let crs = metadata.crs().crs_value().map(|value| {
// `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string
// value, so prefer the raw string when available to round-trip cleanly. For other
// CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form.
value
.as_str()
.map(str::to_string)
.unwrap_or_else(|| value.to_string())
});
GeoMetadata { crs }
}

#[cfg(test)]
mod tests {
use prost::Message;
Expand Down
Loading
Loading