Skip to content

Validity map with nullable datetime columns #866

@cgiachalis

Description

@cgiachalis

Datetime columns (attributes) toggled as nullables should return NA.

Reprex

library(tiledb)

uri <- tempfile()


domain <- tiledb_domain(tiledb_dim("row", c(0L, 100L), 100L, "INT32"))

attrib <- c(tiledb_attr("date",   type = "DATETIME_DAY", nullable = TRUE),
            tiledb_attr("datetime",   type = "DATETIME_MS", nullable = TRUE),
            tiledb_attr("nanosecs",   type = "DATETIME_NS", nullable = TRUE),
            tiledb_attr("float64",  type = "FLOAT64", nullable = TRUE))

schema <- tiledb_array_schema(domain, attrib, sparse=TRUE)
res <- tiledb_array_create(uri, schema)


df <- data.frame(row     =  1:2,
                 date    =  c(as.Date("1990-01-01"), as.Date(NA)),
                 datetime   =  c(as.POSIXct("1990-01-01"), as.POSIXct(NA)),
                 nanosecs   =  nanotime::as.nanotime(c(100, NA)),
                 float64 =  c(1, NA))
df
  row       date       datetime        nanosecs                            float64
1   1 1990-01-01   1990-01-01    1970-01-01T00:00:00.000000100+00:00       1
2   2       <NA>       <NA>            <NA>                                NA
arr <- tiledb_array(uri, return_as="data.table")
arr[] <- df
arr[]
#>      row       date                  datetime      nanosecs                             float64
#>    <int>     <Date>                    <POSc>      <nanotime>                            <num>
#> 1:     1 1990-01-01       1990-01-01 00:00:00       1970-01-01T00:00:00.000000100+00:00     1
#> 2:     2 1970-01-01      -292275055-05-16 18:21:56      <NA>                               NA

Check

 all.equal(target = df, current = arr[],  check.attributes=FALSE)
[1] "Component “date”: 'is.NA' value mismatch: 0 in current 1 in target"    
[2] "Component “datetime”: 'is.NA' value mismatch: 0 in current 1 in target"

The buffer within libtiledb_query_get_buffer_ptr doesn't apply a validity mapping for datetimes if it is toggled as nullable
as it does for other types:

TileDB-R/src/libtiledb.cpp

Lines 3713 to 3719 in 3760c68

} else if (dtype == "DATETIME_HR" || dtype == "DATETIME_MIN" ||
dtype == "DATETIME_SEC" || dtype == "DATETIME_MS" ||
dtype == "DATETIME_US") {
std::vector<int64_t> v(buf->ncells);
std::memcpy(&(v[0]), (void *)buf->vec.data(), buf->ncells * buf->size);
DatetimeVector dv =
int64_to_datetimes(v, _string_to_tiledb_datatype(dtype));

TileDB-R/src/libtiledb.cpp

Lines 3718 to 3719 in 3760c68

DatetimeVector dv =
int64_to_datetimes(v, _string_to_tiledb_datatype(dtype));

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions