From 60256a052e13dc743c467caf440e32301c37fe6f Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 18 Jun 2024 12:41:52 -0600 Subject: [PATCH 01/51] Table Materializer Methods --- src/DimensionalData.jl | 1 + src/array/array.jl | 7 ++++ src/stack/stack.jl | 11 +++++++ src/table_ops.jl | 72 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+) create mode 100644 src/table_ops.jl diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 9eb0c0ae2..0b176b383 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -84,6 +84,7 @@ const DD = DimensionalData # Common include("interface.jl") include("name.jl") +include("table_ops.jl") # Arrays include("array/array.jl") diff --git a/src/array/array.jl b/src/array/array.jl index d82f4492f..b6dd0320d 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -411,6 +411,13 @@ function DimArray(A::AbstractBasicDimArray; newdata = collect(data) DimArray(newdata, format(dims, newdata); refdims, name, metadata) end +# Write a single column from a table with one or more coordinate columns to a DimArray +function DimArray(table, dims::Tuple, col::Symbol; missingval=missing) + perm = _sort_coords(table, dims) + data = Tables.getcolumn(table, col) + dst = _write_vals(data, dims, perm, missingval) + return DimArray(reshape(dst, size(dims)), dims, name=col) +end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index afed49cdc..004ca9f23 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -423,5 +423,16 @@ function DimStack(data::NamedTuple, dims::Tuple; all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end +# Write each column from a table with one or more coordinate columns to a layer in a DimStack +function DimStack(table, dims::Tuple; missingval=missing) + arrays = Any[] + perm = _sort_coords(table, dims) + data_cols = _data_cols(table, dims) + for (name, data) in pairs(data_cols) + dst = _write_vals(data, dims, perm, missingval) + push!(arrays, reshape(dst, size(dims))) + end + return DimStack(NamedTuple{keys(data_cols)}(arrays), dims) +end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) diff --git a/src/table_ops.jl b/src/table_ops.jl new file mode 100644 index 000000000..031ceeae4 --- /dev/null +++ b/src/table_ops.jl @@ -0,0 +1,72 @@ +function _write_vals(data, dims::Tuple, perm, missingval) + # Allocate Destination Array + dst_size = reduce(*, length.(dims)) + dst = Vector{eltype(data)}(undef, dst_size) + dst[perm] .= data + + # Handle Missing Rows + _missingval = _cast_missing(data, missingval) + missing_rows = ones(Bool, dst_size) + missing_rows[perm] .= false + return ifelse.(missing_rows, _missingval, dst) +end + +# Find the order of the table's rows according to the coordinate values +_sort_coords(table, dims::Tuple) = _sort_coords(_dim_cols(table, dims), dims) +function _sort_coords(coords::NamedTuple, dims::Tuple) + ords = _coords_to_ords(coords, dims) + indices = _ords_to_indices(ords, dims) + return indices +end + +# Extract coordinate columns from table +function _dim_cols(table, dims::Tuple) + dim_cols = name.(dims) + return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) +end + +# Extract data columns from table +function _data_cols(table, dims::Tuple) + dim_cols = name.(dims) + data_cols = filter(x -> !(x in dim_cols), Tables.columnnames(table)) + return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) +end + +# Determine the ordinality of a set of numerical coordinates +function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) + stride = (last(dim) - first(dim)) / (length(dim) - 1) + return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1) +end + +# Determine the ordinality of a set of categorical coordinates +function _coords_to_ords(coords::AbstractVector, dim::AbstractVector) + d = Dict{eltype(dim),UInt32}() + for (i, x) in enumerate(dim) + d[x] = i + end + return map(x -> d[x], coords) +end + +# Preprocessing methods for _coords_to_ords +_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, collect(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims) + +# Determine the index from a tuple of coordinate orders +function _ords_to_indices(ords, dims) + stride = 1 + indices = ones(Int, length(ords[1])) + for (ord, dim) in zip(ords, dims) + indices .+= (ord .- 1) .* stride + stride *= length(dim) + end + return indices +end + +function _cast_missing(::AbstractArray{T}, missingval) where {T} + try + return convert(T, missingval) + catch e + return missingval + end +end \ No newline at end of file From eab2fa09afa78a0f825fd3b58c55d90fb7907108 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 18 Jun 2024 13:17:49 -0600 Subject: [PATCH 02/51] Made col Optional for DimArray --- src/array/array.jl | 3 ++- src/table_ops.jl | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 37851b4f2..ff8019055 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -412,8 +412,9 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims::Tuple, col::Symbol; missingval=missing) +function DimArray(table, dims; col=nothing, missingval=missing) perm = _sort_coords(table, dims) + col = isnothing(col) ? _data_col_names(table, dims) |> first : col data = Tables.getcolumn(table, col) dst = _write_vals(data, dims, perm, missingval) return DimArray(reshape(dst, size(dims)), dims, name=col) diff --git a/src/table_ops.jl b/src/table_ops.jl index 031ceeae4..1299b7bb2 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -27,11 +27,16 @@ end # Extract data columns from table function _data_cols(table, dims::Tuple) - dim_cols = name.(dims) - data_cols = filter(x -> !(x in dim_cols), Tables.columnnames(table)) + data_cols = _data_col_names(table, dims) return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) end +# Get names of data columns from table +function _data_col_names(table, dims::Tuple) + dim_cols = name.(dims) + return filter(x -> !(x in dim_cols), Tables.columnnames(table)) +end + # Determine the ordinality of a set of numerical coordinates function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) stride = (last(dim) - first(dim)) / (length(dim) - 1) From d4892df56cefd61a32f05964ee5b257d9b644e34 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 19 Jun 2024 23:30:39 -0600 Subject: [PATCH 03/51] Apply suggestions from code review Co-authored-by: Rafael Schouten --- src/table_ops.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 1299b7bb2..d6aaffb90 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,6 +1,6 @@ function _write_vals(data, dims::Tuple, perm, missingval) # Allocate Destination Array - dst_size = reduce(*, length.(dims)) + dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) dst[perm] .= data @@ -21,7 +21,7 @@ end # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = name.(dims) + dim_cols = map(name, dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end @@ -33,13 +33,13 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = name.(dims) + dim_cols = map(name, dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end # Determine the ordinality of a set of numerical coordinates function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) - stride = (last(dim) - first(dim)) / (length(dim) - 1) + step = (last(dim) - first(dim)) / (length(dim) - 1) return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1) end @@ -68,6 +68,7 @@ function _ords_to_indices(ords, dims) return indices end +_cast_missing(::AbstractArray, missingval::Missing) = missing function _cast_missing(::AbstractArray{T}, missingval) where {T} try return convert(T, missingval) From ea6751adad6a33182854c0bc0c64e5139379939c Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 20 Jun 2024 00:53:05 -0600 Subject: [PATCH 04/51] Handle coordinates with different loci --- src/table_ops.jl | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 1299b7bb2..0417805a0 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -37,23 +37,46 @@ function _data_col_names(table, dims::Tuple) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end -# Determine the ordinality of a set of numerical coordinates -function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) - stride = (last(dim) - first(dim)) / (length(dim) - 1) - return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1) +# Determine the ordinality of a set of regularly spaced numerical coordinates with a starting locus +function _coords_to_ords( + coords::AbstractVector, + dim::Dimension, + ::Type{<:Real}, + ::DimensionalData.Start, + ::DimensionalData.Regular) + step = (last(dim) - first(dim)) / (length(dim) - 1) + return floor.(Int, ((coords .- first(dim)) ./ step) .+ 1) end -# Determine the ordinality of a set of categorical coordinates -function _coords_to_ords(coords::AbstractVector, dim::AbstractVector) - d = Dict{eltype(dim),UInt32}() - for (i, x) in enumerate(dim) - d[x] = i - end - return map(x -> d[x], coords) +# Determine the ordinality of a set of regularly spaced numerical coordinates with a central locus +function _coords_to_ords( + coords::AbstractVector, + dim::Dimension, + ::Type{<:Real}, + ::DimensionalData.Center, + ::DimensionalData.Regular) + step = (last(dim) - first(dim)) / (length(dim) - 1) + return round.(Int, ((coords .- first(dim)) ./ step) .+ 1) +end + +# Determine the ordinality of a set of regularly spaced numerical coordinates with an end locus +function _coords_to_ords( + coords::AbstractVector, + dim::Dimension, + ::Type{<:Real}, + ::DimensionalData.End, + ::DimensionalData.Regular) + step = (last(dim) - first(dim)) / (length(dim) - 1) + return ceil.(Int, ((coords .- first(dim)) ./ step) .+ 1) +end + +# Determine the ordinality of a set of categorical or irregular coordinates +function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Any, ::Any, ::Any) + return map(c -> DimensionalData.selectindices(dim, At(c)), coords) end # Preprocessing methods for _coords_to_ords -_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, collect(dim)) +_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, dim, eltype(dim), locus(dim), span(dim)) _coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) _coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims) From 6a9d26e9f4d484a8ded27933df9e61e695986087 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 20 Jun 2024 01:56:19 -0600 Subject: [PATCH 05/51] replaced At() with Contains() in _coords_to_ords --- src/table_ops.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 175371066..d7fae5318 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -21,10 +21,14 @@ end # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = map(name, dims) + dim_cols = _dim_col_names(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end +function _dim_col_names(dims) + return map(name, dims) +end + # Extract data columns from table function _data_cols(table, dims::Tuple) data_cols = _data_col_names(table, dims) @@ -33,7 +37,7 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = map(name, dims) + dim_cols = _dim_col_names(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end @@ -72,13 +76,13 @@ end # Determine the ordinality of a set of categorical or irregular coordinates function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, At(c)), coords) + return map(c -> DimensionalData.selectindices(dim, Contains(c)), coords) end # Preprocessing methods for _coords_to_ords _coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, dim, eltype(dim), locus(dim), span(dim)) _coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims) +_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(map(x -> coords[x], _dim_col_names(dims)), dims) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) @@ -98,4 +102,4 @@ function _cast_missing(::AbstractArray{T}, missingval) where {T} catch e return missingval end -end \ No newline at end of file +end From 9164c228a6076b3f8a5d60cdab17f45e136235f9 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 25 Jun 2024 00:44:52 -0600 Subject: [PATCH 06/51] Added optional selectors and public methods for table materializer --- src/table_ops.jl | 98 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index d7fae5318..0ecaf8c1d 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,34 +1,89 @@ -function _write_vals(data, dims::Tuple, perm, missingval) +""" + restore_array(data, indices, dims; missingval=missing) + +Restore a dimensional array from a set of values and their corresponding indices. + +# Arguments +- `data`: An `AbstractVector` of values to write to the destination array. +- `indices`: The flat index of each value in `data`. +- `dims`: A `Tuple` of `Dimension` for the corresponding destination array. +- `missingval`: The value to store for missing indices. + +# Example +```julia +julia> d = DimArray(rand(256, 256), (X, Y)); + +julia> t = DimTable(d); + +julia> indices = index_by_coords(t, dims(d)); + +julia> restored = restore_array(Tables.getcolumn(t, :value), indices, dims(d)); + +julia> all(restored .== d) +true +``` +""" +function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple; missingval=missing) # Allocate Destination Array dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) - dst[perm] .= data + dst[indices] .= data # Handle Missing Rows _missingval = _cast_missing(data, missingval) missing_rows = ones(Bool, dst_size) - missing_rows[perm] .= false - return ifelse.(missing_rows, _missingval, dst) + missing_rows[indices] .= false + data = ifelse.(missing_rows, _missingval, dst) + + # Reshape Array + return reshape(data, size(dims)) +end + +""" + index_by_coords(table, dims; selector=Contains) + +Return the flat index of each row in `table` based on its associated coordinates. +Dimension columns are determined from the name of each dimension in `dims`. +It is assumed that the source/destination array has the same dimension order as `dims`. + +# Arguments +- `table`: A table representation of a dimensional array. +- `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. +- `selector`: The selector type to use for non-numerical/irregular coordinates. + +# Example +```julia +julia> d = DimArray(rand(256, 256), (X, Y)); + +julia> t = DimTable(d); + +julia> index_by_coords(t, dims(d)) +65536-element Vector{Int64}: + 1 + 2 + ⋮ + 65535 + 65536 +``` +""" +function index_by_coords(table, dims::Tuple; selector=DimensionalData.Contains) + return _sort_coords(table, dims, selector) end # Find the order of the table's rows according to the coordinate values -_sort_coords(table, dims::Tuple) = _sort_coords(_dim_cols(table, dims), dims) -function _sort_coords(coords::NamedTuple, dims::Tuple) - ords = _coords_to_ords(coords, dims) +_sort_coords(table, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _sort_coords(_dim_cols(table, dims), dims, T) +function _sort_coords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} + ords = _coords_to_ords(coords, dims, T) indices = _ords_to_indices(ords, dims) return indices end # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = _dim_col_names(dims) + dim_cols = name(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end -function _dim_col_names(dims) - return map(name, dims) -end - # Extract data columns from table function _data_cols(table, dims::Tuple) data_cols = _data_col_names(table, dims) @@ -37,7 +92,7 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = _dim_col_names(dims) + dim_cols = name(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end @@ -45,6 +100,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, + ::Type{<:DimensionalData.Selector}, ::Type{<:Real}, ::DimensionalData.Start, ::DimensionalData.Regular) @@ -56,6 +112,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, + ::Type{<:DimensionalData.Selector}, ::Type{<:Real}, ::DimensionalData.Center, ::DimensionalData.Regular) @@ -67,6 +124,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, + ::Type{<:DimensionalData.Selector}, ::Type{<:Real}, ::DimensionalData.End, ::DimensionalData.Regular) @@ -75,14 +133,14 @@ function _coords_to_ords( end # Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, Contains(c)), coords) +function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}, ::Any, ::Any, ::Any) where {T<:DimensionalData.Selector} + return map(c -> DimensionalData.selectindices(dim, T(c)), coords) end -# Preprocessing methods for _coords_to_ords -_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, dim, eltype(dim), locus(dim), span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(map(x -> coords[x], _dim_col_names(dims)), dims) +# Determine the ordinality of a set of coordinates +_coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(coords, dim, T, eltype(dim), locus(dim), span(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = Tuple(_coords_to_ords(c, d, T) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(map(x -> coords[x], name(dims)), dims, T) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) @@ -102,4 +160,4 @@ function _cast_missing(::AbstractArray{T}, missingval) where {T} catch e return missingval end -end +end \ No newline at end of file From 2ebec1c69d8f9c4daea3b96ce715102b7facbfc2 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 25 Jun 2024 00:54:32 -0600 Subject: [PATCH 07/51] Updated table constructors for DimArray and DimStack --- src/array/array.jl | 9 ++++----- src/stack/stack.jl | 10 +++------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index ff8019055..dc61cf9c1 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -412,12 +412,11 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; col=nothing, missingval=missing) - perm = _sort_coords(table, dims) +function DimArray(table, dims; col=nothing, missingval=missing, selector=DimensionalData.Contains) + indices = index_by_coords(table, dims; selector=selector) col = isnothing(col) ? _data_col_names(table, dims) |> first : col - data = Tables.getcolumn(table, col) - dst = _write_vals(data, dims, perm, missingval) - return DimArray(reshape(dst, size(dims)), dims, name=col) + data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missingval) + return DimArray(data, dims, name=col) end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 004ca9f23..165735a16 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -424,14 +424,10 @@ function DimStack(data::NamedTuple, dims::Tuple; DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; missingval=missing) - arrays = Any[] - perm = _sort_coords(table, dims) +function DimStack(table, dims::Tuple; missingval=missing, selector=DimensionalData.Contains) data_cols = _data_cols(table, dims) - for (name, data) in pairs(data_cols) - dst = _write_vals(data, dims, perm, missingval) - push!(arrays, reshape(dst, size(dims))) - end + indices = index_by_coords(table, dims; selector=selector) + arrays = [restore_array(d, indices, dims; missingval=missingval) for d in values(data_cols)] return DimStack(NamedTuple{keys(data_cols)}(arrays), dims) end From 8e791bfe43de768678859966011a3cd6a0315d4b Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 16:53:52 -0600 Subject: [PATCH 08/51] Updated DimArray and DimStack docs to include table materializer methods --- src/array/array.jl | 13 +++++++------ src/stack/stack.jl | 7 ++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index dc61cf9c1..c42870f28 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -334,7 +334,7 @@ end DimArray <: AbstractDimArray DimArray(data, dims, refdims, name, metadata) - DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata()) + DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), selector=Contains) The main concrete subtype of [`AbstractDimArray`](@ref). @@ -344,12 +344,13 @@ moves dimensions to reference dimension `refdims` after reducing operations ## Arguments -- `data`: An `AbstractArray`. +- `data`: An `AbstractArray` or a table with coordinate columns corresponding to `dims`. - `dims`: A `Tuple` of `Dimension` - `name`: A string name for the array. Shows in plots and tables. - `refdims`: refence dimensions. Usually set programmatically to track past slices and reductions of dimension for labelling and reconstruction. - `metadata`: `Dict` or `Metadata` object, or `NoMetadata()` +- `selector`: The coordinate selector type to use when materializing from a table. Indexing can be done with all regular indices, or with [`Dimension`](@ref)s and/or [`Selector`](@ref)s. @@ -412,11 +413,11 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; col=nothing, missingval=missing, selector=DimensionalData.Contains) +function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) indices = index_by_coords(table, dims; selector=selector) - col = isnothing(col) ? _data_col_names(table, dims) |> first : col - data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missingval) - return DimArray(data, dims, name=col) + col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) + data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) + return DimArray(data, dims, name=col; kw...) end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 165735a16..acf6506e0 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -278,6 +278,7 @@ end """ DimStack <: AbstractDimStack + DimStack(table, dims; kw...) DimStack(data::AbstractDimArray...; kw...) DimStack(data::Tuple{Vararg{AbstractDimArray}}; kw...) DimStack(data::NamedTuple{Keys,Vararg{AbstractDimArray}}; kw...) @@ -424,11 +425,11 @@ function DimStack(data::NamedTuple, dims::Tuple; DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; missingval=missing, selector=DimensionalData.Contains) +function DimStack(table, dims::Tuple; selector=DimensionalData.Contains, kw...) data_cols = _data_cols(table, dims) indices = index_by_coords(table, dims; selector=selector) - arrays = [restore_array(d, indices, dims; missingval=missingval) for d in values(data_cols)] - return DimStack(NamedTuple{keys(data_cols)}(arrays), dims) + arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] + return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) From 4cd5f9d7c86dcfefcc26378da8c8dfd2afc4b98d Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 16:54:15 -0600 Subject: [PATCH 09/51] Table materializer test cases --- test/tables.jl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/tables.jl b/test/tables.jl index b5bd416ea..728cd00cb 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -154,3 +154,48 @@ end @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end + +@testset "Materialize from table" begin + a = DimArray(rand(UInt8, 100, 100), (X(100:-1:1), Y(-250:5:249))) + b = DimArray(rand(Float32, 100, 100), (X(100:-1:1), Y(-250:5:249))) + c = DimArray(rand(Float64, 100, 100), (X(100:-1:1), Y(-250:5:249))) + ds = DimStack((a=a, b=b, c=c)) + t = DataFrame(ds) + t1 = Random.shuffle(t) + t2 = t[101:end,:] + + # Restore DimArray from shuffled table + @test all(DimArray(t1, dims(ds)) .== a) + @test all(DimArray(t1, dims(ds), name="a") .== a) + @test all(DimArray(t1, dims(ds), name="b") .== b) + @test all(DimArray(t1, dims(ds), name="c") .== c) + + # Restore DimArray from table with missing rows + @test all(DimArray(t2, dims(ds), name="a")[Y(2:100)] .== a[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="b")[Y(2:100)] .== b[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="c")[Y(2:100)] .== c[Y(2:100)]) + @test DimArray(t2, dims(ds), name="a")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="b")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="c")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="a")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="b")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="c")[Y(2:100)] .|> ismissing .|> (!) |> all + + # Restore DimStack from shuffled table + restored_stack = DimStack(t1, dims(ds)) + @test all(restored_stack.a .== ds.a) + @test all(restored_stack.b .== ds.b) + @test all(restored_stack.c .== ds.c) + + # Restore DimStack from table with missing rows + restored_stack = DimStack(t2, dims(ds)) + @test all(restored_stack.a[Y(2:100)] .== ds.a[Y(2:100)]) + @test all(restored_stack.b[Y(2:100)] .== ds.b[Y(2:100)]) + @test all(restored_stack.c[Y(2:100)] .== ds.c[Y(2:100)]) + @test restored_stack.a[Y(1)] .|> ismissing |> all + @test restored_stack.b[Y(1)] .|> ismissing |> all + @test restored_stack.c[Y(1)] .|> ismissing |> all + @test restored_stack.a[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.b[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.c[Y(2:100)] .|> ismissing .|> (!) |> all +end \ No newline at end of file From 0c1991a42f0edf3e198d3e25cc85b53c5abc0a85 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 17:03:20 -0600 Subject: [PATCH 10/51] export table materializer methods --- src/DimensionalData.jl | 3 +++ src/array/array.jl | 2 +- src/stack/stack.jl | 2 +- src/table_ops.jl | 8 ++++---- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 0b176b383..bccc406c1 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -77,6 +77,9 @@ export dimnum, hasdim, hasselection, otherdims # utils export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims +# table utils +export restore_array, coords_to_index + export groupby, seasons, months, hours, intervals, ranges const DD = DimensionalData diff --git a/src/array/array.jl b/src/array/array.jl index c42870f28..9f19ef5e7 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -414,7 +414,7 @@ function DimArray(A::AbstractBasicDimArray; end # Write a single column from a table with one or more coordinate columns to a DimArray function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) - indices = index_by_coords(table, dims; selector=selector) + indices = coords_to_index(table, dims; selector=selector) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) return DimArray(data, dims, name=col; kw...) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index acf6506e0..98f4d78c7 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -427,7 +427,7 @@ end # Write each column from a table with one or more coordinate columns to a layer in a DimStack function DimStack(table, dims::Tuple; selector=DimensionalData.Contains, kw...) data_cols = _data_cols(table, dims) - indices = index_by_coords(table, dims; selector=selector) + indices = coords_to_index(table, dims; selector=selector) arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) end diff --git a/src/table_ops.jl b/src/table_ops.jl index 0ecaf8c1d..f93ffc388 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -15,7 +15,7 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> indices = index_by_coords(t, dims(d)); +julia> indices = coords_to_index(t, dims(d)); julia> restored = restore_array(Tables.getcolumn(t, :value), indices, dims(d)); @@ -40,7 +40,7 @@ function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, end """ - index_by_coords(table, dims; selector=Contains) + coords_to_index(table, dims; selector=Contains) Return the flat index of each row in `table` based on its associated coordinates. Dimension columns are determined from the name of each dimension in `dims`. @@ -57,7 +57,7 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> index_by_coords(t, dims(d)) +julia> coords_to_index(t, dims(d)) 65536-element Vector{Int64}: 1 2 @@ -66,7 +66,7 @@ julia> index_by_coords(t, dims(d)) 65536 ``` """ -function index_by_coords(table, dims::Tuple; selector=DimensionalData.Contains) +function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains) return _sort_coords(table, dims, selector) end From 4534de580576023b01f27fd2d3b1c4f65672ff02 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 17:20:28 -0600 Subject: [PATCH 11/51] Added Random to tables.jl test cases --- test/tables.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tables.jl b/test/tables.jl index 728cd00cb..23ea9eed5 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,4 +1,4 @@ -using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames +using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames, Random using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray From ed395cac2fca04d1000a3778b38fa96a7d0a696c Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:59:45 -0600 Subject: [PATCH 12/51] Update src/array/array.jl Co-authored-by: Rafael Schouten --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 1946e1292..c1e4e6559 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -432,7 +432,7 @@ function DimArray(A::AbstractBasicDimArray; end # Write a single column from a table with one or more coordinate columns to a DimArray function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) - indices = coords_to_index(table, dims; selector=selector) + indices = coords_to_index(table, dims; selector) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) return DimArray(data, dims, name=col; kw...) From 00336afd3dcc21b4b60170b041eaf0fe9f9ae089 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:00:00 -0600 Subject: [PATCH 13/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index f93ffc388..d7b8622e7 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -66,7 +66,7 @@ julia> coords_to_index(t, dims(d)) 65536 ``` """ -function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains) +function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains()) return _sort_coords(table, dims, selector) end From 532f887b4c1e5a8e38479272ca958e410ce4970c Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 8 Aug 2024 00:00:38 -0600 Subject: [PATCH 14/51] Removed exports --- src/DimensionalData.jl | 3 --- src/array/array.jl | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index bccc406c1..0b176b383 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -77,9 +77,6 @@ export dimnum, hasdim, hasselection, otherdims # utils export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims -# table utils -export restore_array, coords_to_index - export groupby, seasons, months, hours, intervals, ranges const DD = DimensionalData diff --git a/src/array/array.jl b/src/array/array.jl index 1946e1292..d7e3efcd3 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -334,7 +334,7 @@ end DimArray <: AbstractDimArray DimArray(data, dims, refdims, name, metadata) - DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), selector=Contains) + DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), selector=Contains()) The main concrete subtype of [`AbstractDimArray`](@ref). From 06a2c912faf69345688d8d3004b6e5ad93f1efd6 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:05:29 -0600 Subject: [PATCH 15/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index d7b8622e7..e12083fd0 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -107,7 +107,6 @@ function _coords_to_ords( step = (last(dim) - first(dim)) / (length(dim) - 1) return floor.(Int, ((coords .- first(dim)) ./ step) .+ 1) end - # Determine the ordinality of a set of regularly spaced numerical coordinates with a central locus function _coords_to_ords( coords::AbstractVector, From 3bacf338eac5ff7d334e22c131ff786bc72b0b3c Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:18:29 -0600 Subject: [PATCH 16/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index e12083fd0..12f79a364 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -132,7 +132,7 @@ function _coords_to_ords( end # Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}, ::Any, ::Any, ::Any) where {T<:DimensionalData.Selector} +function _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DimensionalData.Selector, ::Any, ::Any, ::Any) return map(c -> DimensionalData.selectindices(dim, T(c)), coords) end From 4ced6f7a445577dce25af1f6eeee84d8d72a5a83 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:18:46 -0600 Subject: [PATCH 17/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 12f79a364..03f6d05fa 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -111,7 +111,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, - ::Type{<:DimensionalData.Selector}, + ::DimensionalData.Selector, ::Type{<:Real}, ::DimensionalData.Center, ::DimensionalData.Regular) From c846dfdfe0d4ad00bf30ba46f24934851458a760 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:18:54 -0600 Subject: [PATCH 18/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 03f6d05fa..519ea20a1 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -100,7 +100,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, - ::Type{<:DimensionalData.Selector}, + ::DimensionalData.Selector, ::Type{<:Real}, ::DimensionalData.Start, ::DimensionalData.Regular) From fe2c871514be29e3c1e9d2a8024c219671a90311 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:19:05 -0600 Subject: [PATCH 19/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 519ea20a1..b0d45e6f9 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -133,7 +133,7 @@ end # Determine the ordinality of a set of categorical or irregular coordinates function _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DimensionalData.Selector, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, T(c)), coords) + return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) end # Determine the ordinality of a set of coordinates From 61f82204739d71845dec865d899e3439aba40dc7 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 8 Aug 2024 00:19:36 -0600 Subject: [PATCH 20/51] Replaced selector type with instance. --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 690d901f6..d488a6b32 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,7 +431,7 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) +function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains(), kw...) indices = coords_to_index(table, dims; selector) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) From dbe7b991420c199ebdb87fa1fc434443853d95ec Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sun, 11 Aug 2024 18:13:17 -0600 Subject: [PATCH 21/51] Table materializer can now infer dimensions from the coordinates. --- src/array/array.jl | 6 +- src/stack/stack.jl | 4 +- src/table_ops.jl | 249 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 202 insertions(+), 57 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index d488a6b32..0b222c6a0 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,12 +431,12 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains(), kw...) - indices = coords_to_index(table, dims; selector) +function DimArray(table, dims; name=NoName(), selector=DimensionalData.Near(), precision=6, kw...) + data = restore_array(table, dims; selector=selector, missingval=missing, name=name, precision=precision) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) - data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) return DimArray(data, dims, name=col; kw...) end +DimArray(table; kw...) = DimArray(table, _guess_dims(table; kw...); kw...) """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 5b19ae29a..80b46388e 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -422,9 +422,9 @@ function DimStack(data::NamedTuple, dims::Tuple; DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; selector=DimensionalData.Contains, kw...) +function DimStack(table, dims::Tuple; selector=DimensionalData.Contains(), kw...) data_cols = _data_cols(table, dims) - indices = coords_to_index(table, dims; selector=selector) + indices = coords_to_indices(table, dims; selector=selector) arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) end diff --git a/src/table_ops.jl b/src/table_ops.jl index b0d45e6f9..7008c674d 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,13 +1,22 @@ """ - restore_array(data, indices, dims; missingval=missing) + restore_array(table; kw...) + restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=Near(), precision=6) -Restore a dimensional array from a set of values and their corresponding indices. +Restore a dimensional array from its tabular representation. # Arguments -- `data`: An `AbstractVector` of values to write to the destination array. -- `indices`: The flat index of each value in `data`. -- `dims`: A `Tuple` of `Dimension` for the corresponding destination array. -- `missingval`: The value to store for missing indices. +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. +Rows can be missing or out of order. +- `dims`: The dimensions of the corresponding `DimArray`. The dimensions may be explicitly defined, or they +may be inferred from the data. In the second case, `restore_array` accepts the same arguments as `guess_dims`. + +# Keyword Arguments +- `name`: The name of the column in `table` from which to restore the array. Defaults to the +first non-dimensional column. +- `missingval`: The value to store for missing rows. +- `selector`: The `Selector` to use when matching coordinates in `table` to their corresponding +indices in `dims`. +- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). # Example ```julia @@ -15,15 +24,29 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> indices = coords_to_index(t, dims(d)); - -julia> restored = restore_array(Tables.getcolumn(t, :value), indices, dims(d)); +julia> restored = restore_array(t); julia> all(restored .== d) true ``` """ -function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple; missingval=missing) +restore_array(table; kw...) = restore_array(table, _dim_col_names(table); kw...) +function restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=DimensionalData.Near(), precision=6) + # Get array dimensions + dims = guess_dims(table, dims, precision=precision) + + # Determine row indices based on coordinate values + indices = coords_to_indices(table, dims; selector=selector) + + # Extract the data column correspondong to `name` + col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) + data = _get_column(table, col) + + # Restore array data + return _restore_array(data, indices, dims, missingval) +end + +function _restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) # Allocate Destination Array dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) @@ -40,7 +63,7 @@ function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, end """ - coords_to_index(table, dims; selector=Contains) + coords_to_indices(table, dims; selector=Near()) Return the flat index of each row in `table` based on its associated coordinates. Dimension columns are determined from the name of each dimension in `dims`. @@ -57,7 +80,7 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> coords_to_index(t, dims(d)) +julia> coords_to_indices(t, dims(d)) 65536-element Vector{Int64}: 1 2 @@ -66,24 +89,103 @@ julia> coords_to_index(t, dims(d)) 65536 ``` """ -function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains()) - return _sort_coords(table, dims, selector) +function coords_to_indices(table, dims::Tuple; selector=DimensionalData.Near()) + return _coords_to_indices(table, dims, selector) end # Find the order of the table's rows according to the coordinate values -_sort_coords(table, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _sort_coords(_dim_cols(table, dims), dims, T) -function _sort_coords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} - ords = _coords_to_ords(coords, dims, T) +_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_indices(_dim_cols(table, dims), dims, sel) +function _coords_to_indices(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) + ords = _coords_to_ords(coords, dims, sel) indices = _ords_to_indices(ords, dims) return indices end +""" + guess_dims(table; kw...) + guess_dims(table, dims; precision=6) + +Guesses the dimensions of an array based on the provided tabular representation. + +# Arguments +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. +The dimensions will be inferred from the corresponding coordinate collumns in the table. +- `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default +to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular +value or as a `Pair` with both the dimensions and corresponding order. The order will be inferred from the data +when none is given. This should work for sorted coordinates, but will not be sufficient when the table's rows are +out of order. + +# Keyword Arguments +- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). + +# Returns +A tuple containing the inferred dimensions from the table. + +# Example +```julia +julia> xdims = X(LinRange{Float64}(610000.0, 661180.0, 2560)); + +julia> ydims = Y(LinRange{Float64}(6.84142e6, 6.79024e6, 2560)); + +julia> bdims = Dim{:Band}([:B02, :B03, :B04]); + +julia> d = DimArray(rand(UInt16, 2560, 2560, 3), (xdims, ydims, bdims)); + +julia> t = DataFrame(d); + +julia> t_rand = Random.shuffle(t); + +julia> dims(d) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered + +julia> DD.guess_dims(t) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered + +julia> DD.guess_dims(t, (X, Y, :Band)) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered + +julia> DD.guess_dims(t_rand, (X => DD.ForwardOrdered(), Y => DD.ReverseOrdered(), :Band => DD.ForwardOrdered())) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered +``` +""" +guess_dims(table; kw...) = guess_dims(table, filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)); kw...) +guess_dims(table, dims::Tuple; kw...) = map(dim -> guess_dims(table, dim; kw...), dims) +guess_dims(table, dim; precision=6) = _guess_dims(_get_column(table, dim), dim, precision) + +_guess_dims(coords::AbstractVector, dim::DD.Dimension, args...) = dim +_guess_dims(coords::AbstractVector, dim::Type{<:DD.Dimension}, args...) = _guess_dims(coords, DD.name(dim), args...) +_guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) +function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) + dim_vals = _dim_vals(coords, precision) + order = _guess_dim_order(dim_vals) + span = _guess_dim_span(dim_vals, order, precision) + return _build_dim(dim_vals, dim, order, span) +end +function _guess_dims(coords::AbstractVector, dim::Symbol, order::DD.Order, precision::Int) + dim_vals = _dim_vals(coords, order, precision) + span = _guess_dim_span(dim_vals, order, precision) + return _build_dim(dim_vals, dim, order, span) +end + # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = name(dims) + dim_cols = DD.name(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end +# Extract dimension column names from the given table +_dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) +_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), DD.name(dims)) + # Extract data columns from table function _data_cols(table, dims::Tuple) data_cols = _data_col_names(table, dims) @@ -92,54 +194,97 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = name(dims) + dim_cols = DD.name(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end -# Determine the ordinality of a set of regularly spaced numerical coordinates with a starting locus -function _coords_to_ords( - coords::AbstractVector, - dim::Dimension, - ::DimensionalData.Selector, - ::Type{<:Real}, - ::DimensionalData.Start, - ::DimensionalData.Regular) - step = (last(dim) - first(dim)) / (length(dim) - 1) - return floor.(Int, ((coords .- first(dim)) ./ step) .+ 1) -end -# Determine the ordinality of a set of regularly spaced numerical coordinates with a central locus +# Determine the ordinality of a set of coordinates +_coords_to_ords(coords::AbstractVector, dim::DD.Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple, sel::DimensionalData.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) + +# Determine the ordinality of a set of regularly spaced numerical coordinates function _coords_to_ords( - coords::AbstractVector, + coords::AbstractVector{<:Real}, dim::Dimension, - ::DimensionalData.Selector, - ::Type{<:Real}, - ::DimensionalData.Center, - ::DimensionalData.Regular) - step = (last(dim) - first(dim)) / (length(dim) - 1) - return round.(Int, ((coords .- first(dim)) ./ step) .+ 1) + ::DimensionalData.Near, + position::DimensionalData.Position, + span::DimensionalData.Regular) + step = DD.step(span) + float_ords = ((coords .- first(dim)) ./ step) .+ 1 + int_ords = _round_ords(float_ords, position) + return clamp!(int_ords, 1, length(dim)) end -# Determine the ordinality of a set of regularly spaced numerical coordinates with an end locus +# Determine the ordinality of a set of categorical or irregular coordinates function _coords_to_ords( coords::AbstractVector, dim::Dimension, - ::Type{<:DimensionalData.Selector}, - ::Type{<:Real}, - ::DimensionalData.End, - ::DimensionalData.Regular) - step = (last(dim) - first(dim)) / (length(dim) - 1) - return ceil.(Int, ((coords .- first(dim)) ./ step) .+ 1) + sel::DimensionalData.Selector, + ::DimensionalData.Position, + ::DimensionalData.Span) + return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) end -# Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DimensionalData.Selector, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) +# Round coordinate ordinality to the appropriate integer given the specified locus +_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Start) = floor.(Int, ords) +_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Center) = round.(Int, ords) +_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.End) = ceil.(Int, ords) + +# Extract dimension value from the given vector of coordinates +_dim_vals(coords::AbstractVector, precision::Int) = _unique_vals(coords, precision) +_dim_vals(coords::AbstractVector, ::DD.Order, precision::Int) = _unique_vals(coords, precision) +_dim_vals(coords::AbstractVector, ::DD.ForwardOrdered, precision::Int) = sort!(_unique_vals(coords, precision)) +_dim_vals(coords::AbstractVector, ::DD.ReverseOrdered, precision::Int) = sort!(_unique_vals(coords, precision), rev=true) + +# Extract all unique coordinates from the given vector +_unique_vals(coords::AbstractVector, precision::Int) = _round_dim_val.(coords, precision) |> unique + +# Round dimension value within the specified precision +_round_dim_val(x, ::Int) = x +_round_dim_val(x::Real, precision::Int) = round(x, digits=precision) + +# Determine if the given coordinates are forward ordered, reverse ordered, or unordered +function _guess_dim_order(coords::AbstractVector) + if issorted(coords) + return DD.ForwardOrdered() + elseif issorted(coords, rev=true) + return DD.ReverseOrdered() + else + return DD.Unordered() + end end -# Determine the ordinality of a set of coordinates -_coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(coords, dim, T, eltype(dim), locus(dim), span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = Tuple(_coords_to_ords(c, d, T) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(map(x -> coords[x], name(dims)), dims, T) +# Estimate the span between consecutive coordinates +_guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular() +_guess_dim_span(::AbstractVector{<:Real}, ::DD.Order, ::Int) = DD.Irregular() +function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision::Int) + steps = round.((@view coords[2:end]) .- (@view coords[1:end-1]), digits=precision) + span = argmin(abs, steps) + return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() +end + +function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) + return Dim{dim}(DD.Categorical(vals, order=order)) +end +function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Irregular) + return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) +end +function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Regular) + n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) + dim_vals = LinRange(first(vals), last(vals), n) + return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) +end + +_get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) +_get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) +_get_column(table, x::Symbol) = Tables.getcolumn(table, x) +_get_column(table, x::Pair) = _get_column(table, first(x)) + +_dim_name(x::Symbol) = x +_dim_name(x::DD.Dimension) = DD.name(x) +_dim_name(x::Type{<:DD.Dimension}) = DD.name(x) +_dim_name(x::Pair) = _dim_name(first(x)) => last(x) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) From f41098856ab8b41952904dc69730a1f5e3384dad Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:55:45 -0600 Subject: [PATCH 22/51] Update src/stack/stack.jl Co-authored-by: Rafael Schouten --- src/stack/stack.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 80b46388e..6e47f1ccb 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -278,7 +278,7 @@ end """ DimStack <: AbstractDimStack - DimStack(table, dims; kw...) + DimStack(table, [dims]; kw...) DimStack(data::AbstractDimArray...; kw...) DimStack(data::Tuple{Vararg{AbstractDimArray}}; kw...) DimStack(data::NamedTuple{Keys,Vararg{AbstractDimArray}}; kw...) From a17f06900044214b149926e62a0bd3ef273c41a0 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:56:08 -0600 Subject: [PATCH 23/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 7008c674d..e0add6489 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -94,7 +94,8 @@ function coords_to_indices(table, dims::Tuple; selector=DimensionalData.Near()) end # Find the order of the table's rows according to the coordinate values -_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_indices(_dim_cols(table, dims), dims, sel) +_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = + _coords_to_indices(_dim_cols(table, dims), dims, sel) function _coords_to_indices(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) ords = _coords_to_ords(coords, dims, sel) indices = _ords_to_indices(ords, dims) From 9bdded94e085fe54396234f6f15ac1e5a4041f7c Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:56:23 -0600 Subject: [PATCH 24/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index e0add6489..04e7543b6 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -109,7 +109,7 @@ end Guesses the dimensions of an array based on the provided tabular representation. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. The dimensions will be inferred from the corresponding coordinate collumns in the table. - `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular From 545108758d23908baa149555aa131c247b664249 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:16:02 -0600 Subject: [PATCH 25/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 04e7543b6..791aa06a4 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -268,7 +268,7 @@ end function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) return Dim{dim}(DD.Categorical(vals, order=order)) end -function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Irregular) +function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Regular) From faf4d76ba019157c563794523ad82ac38fec3d1b Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:17:24 -0600 Subject: [PATCH 26/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 791aa06a4..48120e9b9 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -271,7 +271,7 @@ end function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end -function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Regular) +function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) dim_vals = LinRange(first(vals), last(vals), n) return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) From 02f60a387d17c53de8ff5460ecc4917760fc8537 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:18:11 -0600 Subject: [PATCH 27/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 48120e9b9..bf7ea9969 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -258,7 +258,6 @@ end # Estimate the span between consecutive coordinates _guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular() -_guess_dim_span(::AbstractVector{<:Real}, ::DD.Order, ::Int) = DD.Irregular() function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision::Int) steps = round.((@view coords[2:end]) .- (@view coords[1:end-1]), digits=precision) span = argmin(abs, steps) From fafd3574ddf186b020247609b9292fcbe1414f61 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Sun, 22 Sep 2024 11:55:51 -0600 Subject: [PATCH 28/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index bf7ea9969..6fd56589e 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -281,10 +281,6 @@ _get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) _get_column(table, x::Symbol) = Tables.getcolumn(table, x) _get_column(table, x::Pair) = _get_column(table, first(x)) -_dim_name(x::Symbol) = x -_dim_name(x::DD.Dimension) = DD.name(x) -_dim_name(x::Type{<:DD.Dimension}) = DD.name(x) -_dim_name(x::Pair) = _dim_name(first(x)) => last(x) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) From d7f15f5d30286e9a2086419fe2811c48147fc20a Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:47:44 -0600 Subject: [PATCH 29/51] Update src/array/array.jl Co-authored-by: Rafael Schouten --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 0b222c6a0..65b4dc011 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,7 +431,7 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=DimensionalData.Near(), precision=6, kw...) +function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, kw...) data = restore_array(table, dims; selector=selector, missingval=missing, name=name, precision=precision) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) return DimArray(data, dims, name=col; kw...) From 34a0a697fe14c79b2c93b527cc5dcc897420c7b8 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:15:36 -0600 Subject: [PATCH 30/51] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 6fd56589e..052d46659 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -265,7 +265,7 @@ function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision end function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) - return Dim{dim}(DD.Categorical(vals, order=order)) + return rebuild(name2dim(dim), DD.Categorical(vals, order=order)) end function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) From d0b9eb721d227e22ac5428c5261bce2ff45b73c2 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Wed, 25 Sep 2024 18:16:12 -0600 Subject: [PATCH 31/51] Added support for guessing the dimension ordering and span for Dates and DateTimes --- src/table_ops.jl | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 6fd56589e..b01a8cb1b 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -200,9 +200,9 @@ function _data_col_names(table, dims::Tuple) end # Determine the ordinality of a set of coordinates -_coords_to_ords(coords::AbstractVector, dim::DD.Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple, sel::DimensionalData.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) +_coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple, sel::DD.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DD.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) # Determine the ordinality of a set of regularly spaced numerical coordinates function _coords_to_ords( @@ -239,19 +239,20 @@ _dim_vals(coords::AbstractVector, ::DD.ForwardOrdered, precision::Int) = sort!(_ _dim_vals(coords::AbstractVector, ::DD.ReverseOrdered, precision::Int) = sort!(_unique_vals(coords, precision), rev=true) # Extract all unique coordinates from the given vector -_unique_vals(coords::AbstractVector, precision::Int) = _round_dim_val.(coords, precision) |> unique - -# Round dimension value within the specified precision -_round_dim_val(x, ::Int) = x -_round_dim_val(x::Real, precision::Int) = round(x, digits=precision) +_unique_vals(coords::AbstractVector, ::Int) = unique(coords) +_unique_vals(coords::AbstractVector{<:Real}, precision::Int) = round.(coords, digits=precision) |> unique # Determine if the given coordinates are forward ordered, reverse ordered, or unordered function _guess_dim_order(coords::AbstractVector) - if issorted(coords) - return DD.ForwardOrdered() - elseif issorted(coords, rev=true) - return DD.ReverseOrdered() - else + try + if issorted(coords) + return DD.ForwardOrdered() + elseif issorted(coords, rev=true) + return DD.ReverseOrdered() + else + return DD.Unordered() + end + catch return DD.Unordered() end end @@ -263,14 +264,19 @@ function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision span = argmin(abs, steps) return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() end +function _guess_dim_span(coords::AbstractVector{<:Dates.AbstractTime}, ::DD.Ordered, precision::Int) + steps = (@view coords[2:end]) .- (@view coords[1:end-1]) + span = argmin(abs, steps) + return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() +end function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) return Dim{dim}(DD.Categorical(vals, order=order)) end -function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) +function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end -function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) +function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) dim_vals = LinRange(first(vals), last(vals), n) return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) From 0ea72a05e3ff1088189cf9aab520e520ce597a75 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 27 Sep 2024 14:19:10 -0600 Subject: [PATCH 32/51] Replaced LinRange with StepRangeLen in _build_dim --- src/table_ops.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index dd995b79f..8f7fcbe14 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -274,12 +274,12 @@ function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Spa return rebuild(name2dim(dim), DD.Categorical(vals, order=order)) end function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) - return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) + return rebuild(name2dim(dim), DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) - dim_vals = LinRange(first(vals), last(vals), n) - return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) + dim_vals = StepRangeLen(first(vals), span.step, n) + return rebuild(name2dim(dim), DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) end _get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) From bc629320e37ae80cf5eeadb63890f5d28810abeb Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 15 Oct 2024 00:41:38 -0600 Subject: [PATCH 33/51] Added Tables.istable check to DimArray constructor --- src/array/array.jl | 34 ++++++++++--- src/table_ops.jl | 120 +++++++++++++++++++++------------------------ 2 files changed, 83 insertions(+), 71 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 65b4dc011..f02449401 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,12 +431,34 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, kw...) - data = restore_array(table, dims; selector=selector, missingval=missing, name=name, precision=precision) - col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) - return DimArray(data, dims, name=col; kw...) +function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, missingval=missing, kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + + # Get array dimensions + dims = guess_dims(table, dims, precision=precision) + + # Determine row indices based on coordinate values + indices = coords_to_indices(table, dims; selector=selector) + + # Extract the data column correspondong to `name` + col = name == NoName() ? data_col_names(table, dims) |> first : Symbol(name) + data = Tables.getcolumn(table, col) + + # Restore array data + array = restore_array(data, indices, dims, missingval) + + # Return DimArray + return DimArray(array, dims, name=col; kw...) +end +# Same as above, but guess dimension names +function DimArray(table; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + + # Use default dimension + return DimArray(table, guess_dims(table; kw...); kw...) end -DimArray(table; kw...) = DimArray(table, _guess_dims(table; kw...); kw...) """ DimArray(f::Function, dim::Dimension; [name]) @@ -445,7 +467,7 @@ Apply function `f` across the values of the dimension `dim` the given dimension. Optionally provide a name for the result. """ function DimArray(f::Function, dim::Dimension; name=Symbol(nameof(f), "(", name(dim), ")")) - DimArray(f.(val(dim)), (dim,); name) + DimArray(f.(val(dim)), (dim,); name) end const DimVector = DimArray{T,1} where T diff --git a/src/table_ops.jl b/src/table_ops.jl index 8f7fcbe14..73b3d2525 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,52 +1,19 @@ """ - restore_array(table; kw...) - restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=Near(), precision=6) + restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) Restore a dimensional array from its tabular representation. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. -Rows can be missing or out of order. -- `dims`: The dimensions of the corresponding `DimArray`. The dimensions may be explicitly defined, or they -may be inferred from the data. In the second case, `restore_array` accepts the same arguments as `guess_dims`. - -# Keyword Arguments -- `name`: The name of the column in `table` from which to restore the array. Defaults to the -first non-dimensional column. -- `missingval`: The value to store for missing rows. -- `selector`: The `Selector` to use when matching coordinates in `table` to their corresponding -indices in `dims`. -- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). - -# Example -```julia -julia> d = DimArray(rand(256, 256), (X, Y)); - -julia> t = DimTable(d); +- `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. +- `indices`: An `AbstractVector` containing the flat indices corresponding to each element in `data`. +- `dims`: The dimensions of the destination `DimArray`. +- `missingval`: The value to write for missing elements in `data`. -julia> restored = restore_array(t); - -julia> all(restored .== d) -true +# Returns +An `Array` containing the ordered valued in `data` with the size specified by `dims`. ``` """ -restore_array(table; kw...) = restore_array(table, _dim_col_names(table); kw...) -function restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=DimensionalData.Near(), precision=6) - # Get array dimensions - dims = guess_dims(table, dims, precision=precision) - - # Determine row indices based on coordinate values - indices = coords_to_indices(table, dims; selector=selector) - - # Extract the data column correspondong to `name` - col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) - data = _get_column(table, col) - - # Restore array data - return _restore_array(data, indices, dims, missingval) -end - -function _restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) +function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) # Allocate Destination Array dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) @@ -143,28 +110,63 @@ julia> dims(d) ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered julia> DD.guess_dims(t) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, +→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t, (X, Y, :Band)) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +julia> DD.guess_dims(t, X, Y, :Band) +↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, +→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t_rand, (X => DD.ForwardOrdered(), Y => DD.ReverseOrdered(), :Band => DD.ForwardOrdered())) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +julia> DD.guess_dims(t_rand, X => DD.ForwardOrdered, Y => DD.ReverseOrdered, :Band => DD.ForwardOrdered) +↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, +→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered ``` """ -guess_dims(table; kw...) = guess_dims(table, filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)); kw...) -guess_dims(table, dims::Tuple; kw...) = map(dim -> guess_dims(table, dim; kw...), dims) -guess_dims(table, dim; precision=6) = _guess_dims(_get_column(table, dim), dim, precision) +guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) +function guess_dims(table, dims::Tuple; precision=6) + map(dim -> _guess_dims(get_column(table, dim), dim, precision), dims) +end + +""" + get_column(table, dim::Type{<:DD.Dimension}) + get_column(table, dim::DD.Dimension) + get_column(table, dim::Symbol) + get_column(table, dim::Pair) + +Retrieve the coordinate data stored in the column specified by `dim`. + +# Arguments +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. +- `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`, or a `Dimension => Order` pair. +""" +get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) +get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) +get_column(table, x::Symbol) = Tables.getcolumn(table, x) +get_column(table, x::Pair) = get_column(table, first(x)) + +""" + data_col_names(table, dims::Tuple) + +Return the names of all columns that don't matched the dimensions given by `dims`. + +# Arguments +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. +- `dims`: A `Tuple` of one or more `Dimensions`. +""" +function data_col_names(table, dims::Tuple) + dim_cols = DD.name(dims) + return filter(x -> !(x in dim_cols), Tables.columnnames(table)) +end _guess_dims(coords::AbstractVector, dim::DD.Dimension, args...) = dim _guess_dims(coords::AbstractVector, dim::Type{<:DD.Dimension}, args...) = _guess_dims(coords, DD.name(dim), args...) _guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) +function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: DD.Order} + return _guess_dims(coords, dim, T(), precision) +end function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) dim_vals = _dim_vals(coords, precision) order = _guess_dim_order(dim_vals) @@ -189,16 +191,10 @@ _dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), DD # Extract data columns from table function _data_cols(table, dims::Tuple) - data_cols = _data_col_names(table, dims) + data_cols = data_col_names(table, dims) return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) end -# Get names of data columns from table -function _data_col_names(table, dims::Tuple) - dim_cols = DD.name(dims) - return filter(x -> !(x in dim_cols), Tables.columnnames(table)) -end - # Determine the ordinality of a set of coordinates _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) _coords_to_ords(coords::Tuple, dims::Tuple, sel::DD.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) @@ -282,12 +278,6 @@ function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, di return rebuild(name2dim(dim), DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) end -_get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) -_get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) -_get_column(table, x::Symbol) = Tables.getcolumn(table, x) -_get_column(table, x::Pair) = _get_column(table, first(x)) - - # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) stride = 1 From 76f8805220e08ce884fc1e3c057c229df1717dcf Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Wed, 19 Mar 2025 17:42:56 +0100 Subject: [PATCH 34/51] Update src/array/array.jl --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index f02449401..0c05c5af9 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -433,7 +433,7 @@ end # Write a single column from a table with one or more coordinate columns to a DimArray function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, missingval=missing, kw...) # Confirm that the Tables interface is implemented - Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) # Get array dimensions dims = guess_dims(table, dims, precision=precision) From ae13b26461cfb6e74d66f79c783c4b0cc5254169 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 5 May 2025 11:01:10 +0300 Subject: [PATCH 35/51] merge materialize2 --- src/Dimensions/dimension.jl | 1 + src/array/array.jl | 40 ++++-- src/stack/stack.jl | 18 ++- src/table_ops.jl | 274 +++++++++++++++++------------------- test/tables.jl | 122 +++++++++++----- 5 files changed, 261 insertions(+), 194 deletions(-) diff --git a/src/Dimensions/dimension.jl b/src/Dimensions/dimension.jl index 38d07a631..b49dc966f 100644 --- a/src/Dimensions/dimension.jl +++ b/src/Dimensions/dimension.jl @@ -182,6 +182,7 @@ lookuptype(x) = NoLookup name(dim::Dimension) = name(typeof(dim)) name(dim::Val{D}) where D = name(D) name(dim::Type{D}) where D<:Dimension = nameof(D) +name(s::Symbol) = s label(x) = string(name(x)) diff --git a/src/array/array.jl b/src/array/array.jl index 9f37951c2..30a7274ba 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -125,7 +125,8 @@ function Base.NamedTuple(A1::AbstractDimArray, As::AbstractDimArray...) end # undef constructor for all AbstractDimArray -(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{<:Any}} = A(x, dims; kw...) +(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{T}} where T = + A(x, dims; kw...) function (::Type{A})(x::UndefInitializer, dims::DimTuple; kw...) where {A<:AbstractDimArray{T}} where T basetypeof(A)(Array{T}(undef, size(dims)), dims; kw...) end @@ -476,15 +477,33 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, missingval=missing, kw...) +function DimArray(table, dims; kw...) # Confirm that the Tables interface is implemented Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) - - # Get array dimensions - dims = guess_dims(table, dims, precision=precision) - + _dimarray_from_table(table, guess_dims(table, dims); kw...) +end +function DimArray(data::AbstractVector{<:NamedTuple{K}}, dims::Tuple; + refdims=(), name=NoName(), metadata=NoMetadata(), kw... +) where K + if all(map(d -> Dimensions.name(d) in K, dims)) + table = Tables.columns(data) + return _dimarray_from_table(table, guess_dims(table, dims; kw...); + refdims, name, metadata, kw...) + else + return DimArray(data, format(dims, data), refdims, name, metadata) + end +end +# Same as above, but guess dimension names +function DimArray(table; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) + # Use default dimension + return _dimarray_from_table(table, guess_dims(table; kw...); kw...) +end +function _dimarray_from_table(table, dims; name=NoName(), selector=nothing, precision=6, missingval=missing, kw...) # Determine row indices based on coordinate values - indices = coords_to_indices(table, dims; selector=selector) + indices = coords_to_indices(table, dims; selector, atol=10.0^-precision) # Extract the data column correspondong to `name` col = name == NoName() ? data_col_names(table, dims) |> first : Symbol(name) @@ -496,14 +515,7 @@ function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, miss # Return DimArray return DimArray(array, dims, name=col; kw...) end -# Same as above, but guess dimension names -function DimArray(table; kw...) - # Confirm that the Tables interface is implemented - Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) - # Use default dimension - return DimArray(table, guess_dims(table; kw...); kw...) -end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index e5ee923ce..93768c615 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -31,6 +31,8 @@ const AbstractMatrixDimStack = AbstractDimStack{K,T,2} where {K,T} (::Type{T})(st::AbstractDimStack) where T<:AbstractDimArray = T([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st)) +# for ambiguity +DimArray(st::AbstractDimStack) = T([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st)) data(s::AbstractDimStack) = getfield(s, :data) dims(s::AbstractDimStack) = getfield(s, :dims) @@ -452,17 +454,27 @@ function DimStack(data::NamedTuple, dims::Tuple; layermetadata=map(_ -> NoMetadata(), data), layerdims = map(_ -> basedims(dims), data), ) + # Treat as a table if the dims correspond to data columns. + Tables.istable(data) && all(d -> name(d) in keys(data), dims) && + return _dimstack_from_table(data, dims; refdims, metadata) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end DimStack(st::AbstractDimStack) = DimStack(data(st), dims(st), refdims(st), layerdims(st), metadata(st), layermetadata(st)) # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; selector=DimensionalData.Contains(), kw...) +DimStack(table, dims::Tuple; kw...) = _dimstack_from_table(table, dims; kw...) +DimStack(table; kw...) = _dimstack_from_table(table, guess_dims(table); kw...) + +function _dimstack_from_table(table, dims; selector=nothing, precision=6, missingval = missing, kw...) + table = Tables.columnaccess(table) ? table : Tables.columns(table) data_cols = _data_cols(table, dims) + dims = guess_dims(table, dims, precision=precision) indices = coords_to_indices(table, dims; selector=selector) - arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] - return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) + layers = map(data_cols) do d + restore_array(d, indices, dims, missingval) + end + return DimStack(layers, dims; kw...) end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) diff --git a/src/table_ops.jl b/src/table_ops.jl index 73b3d2525..0a9026166 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,11 +1,11 @@ """ - restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) + restore_array(data::AbstractVector, indices::AbstractVector{<:NTuple{<:Any, Dim}}, dims::Tuple, missingval) Restore a dimensional array from its tabular representation. # Arguments - `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. -- `indices`: An `AbstractVector` containing the flat indices corresponding to each element in `data`. +- `indices`: An `AbstractVector` containing the dimensional indices corresponding to each element in `data`. - `dims`: The dimensions of the destination `DimArray`. - `missingval`: The value to write for missing elements in `data`. @@ -13,60 +13,58 @@ Restore a dimensional array from its tabular representation. An `Array` containing the ordered valued in `data` with the size specified by `dims`. ``` """ -function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) +function restore_array(data::AbstractVector, indices::AbstractVector, dims::Tuple, missingval) # Allocate Destination Array - dst_size = prod(map(length, dims)) - dst = Vector{eltype(data)}(undef, dst_size) - dst[indices] .= data - - # Handle Missing Rows - _missingval = _cast_missing(data, missingval) - missing_rows = ones(Bool, dst_size) - missing_rows[indices] .= false - data = ifelse.(missing_rows, _missingval, dst) - - # Reshape Array - return reshape(data, size(dims)) + dst = DimArray{eltype(data)}(undef, dims) + for (idx, d) in zip(indices, data) + dst[idx] = d + end + + if length(indices) !== length(dst) + # Handle Missing Rows + _missingval = _cast_missing(data, missingval) + missing_rows = trues(dims) + for idx in indices # looping is faster than broadcasting + missing_rows[idx] = false + end + return ifelse.(missing_rows, _missingval, dst) + end + return dst end """ - coords_to_indices(table, dims; selector=Near()) + coords_to_indices(table, dims; [selector], [atol]) -Return the flat index of each row in `table` based on its associated coordinates. +Return the dimensional index of each row in `table` based on its associated coordinates. Dimension columns are determined from the name of each dimension in `dims`. -It is assumed that the source/destination array has the same dimension order as `dims`. # Arguments - `table`: A table representation of a dimensional array. - `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. -- `selector`: The selector type to use for non-numerical/irregular coordinates. +- `selector`: The selector type to use. This defaults to `Near()` for orderd, sampled dimensions + and `At()` for all other dimensions. +- `atol`: The absolute tolerance to use with `At()`. This defaults to `1e-6`. # Example ```julia -julia> d = DimArray(rand(256, 256), (X, Y)); +julia> d = rand(X(1:256), Y(1:256)); julia> t = DimTable(d); julia> coords_to_indices(t, dims(d)) -65536-element Vector{Int64}: - 1 - 2 - ⋮ - 65535 - 65536 +65536-element Vector{Tuple{X{Int64}, Y{Int64}}}: + (↓ X 1, → Y 1) + (↓ X 2, → Y 1) + (↓ X 3, → Y 1) + (↓ X 4, → Y 1) + ⋮ + (↓ X 254, → Y 256) + (↓ X 255, → Y 256) + (↓ X 256, → Y 256) ``` """ -function coords_to_indices(table, dims::Tuple; selector=DimensionalData.Near()) - return _coords_to_indices(table, dims, selector) -end - -# Find the order of the table's rows according to the coordinate values -_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = - _coords_to_indices(_dim_cols(table, dims), dims, sel) -function _coords_to_indices(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) - ords = _coords_to_ords(coords, dims, sel) - indices = _ords_to_indices(ords, dims) - return indices +function coords_to_indices(table, dims::Tuple; selector=nothing, atol = 1e-6) + return _coords_to_indices(table, dims, selector, atol) end """ @@ -92,6 +90,10 @@ A tuple containing the inferred dimensions from the table. # Example ```julia +julia> using DimensionalData, DataFrames + +julia> import DimensionalData: Lookups, guess_dims + julia> xdims = X(LinRange{Float64}(610000.0, 661180.0, 2560)); julia> ydims = Y(LinRange{Float64}(6.84142e6, 6.79024e6, 2560)); @@ -109,43 +111,41 @@ julia> dims(d) → Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t) +julia> guess_dims(t) ↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, → Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t, X, Y, :Band) +julia> guess_dims(t, X, Y, :Band) ↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, → Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t_rand, X => DD.ForwardOrdered, Y => DD.ReverseOrdered, :Band => DD.ForwardOrdered) +julia> guess_dims(t_rand, X => ForwardOrdered, Y => ReverseOrdered, :Band => ForwardOrdered) ↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, → Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered ``` """ guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) -function guess_dims(table, dims::Tuple; precision=6) - map(dim -> _guess_dims(get_column(table, dim), dim, precision), dims) +function guess_dims(table, dims::Tuple; precision=6, kw...) + map(dim -> _guess_dims(get_column(table, name(dim)), dim, precision), dims) end """ - get_column(table, dim::Type{<:DD.Dimension}) - get_column(table, dim::DD.Dimension) + get_column(table, dim::Type{<:Dimension}) + get_column(table, dim::Dimension) get_column(table, dim::Symbol) - get_column(table, dim::Pair) Retrieve the coordinate data stored in the column specified by `dim`. # Arguments - `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. -- `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`, or a `Dimension => Order` pair. +- `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`. """ -get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) -get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) +get_column(table, x::Type{<:Dimension}) = Tables.getcolumn(table, name(x)) +get_column(table, x::Dimension) = Tables.getcolumn(table, name(x)) get_column(table, x::Symbol) = Tables.getcolumn(table, x) -get_column(table, x::Pair) = get_column(table, first(x)) """ data_col_names(table, dims::Tuple) @@ -157,37 +157,43 @@ Return the names of all columns that don't matched the dimensions given by `dims - `dims`: A `Tuple` of one or more `Dimensions`. """ function data_col_names(table, dims::Tuple) - dim_cols = DD.name(dims) + dim_cols = name(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end -_guess_dims(coords::AbstractVector, dim::DD.Dimension, args...) = dim -_guess_dims(coords::AbstractVector, dim::Type{<:DD.Dimension}, args...) = _guess_dims(coords, DD.name(dim), args...) +_guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, args...) = _guess_dims(coords, name(dim), args...) _guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) -function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: DD.Order} +function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: Order} return _guess_dims(coords, dim, T(), precision) end function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) - dim_vals = _dim_vals(coords, precision) - order = _guess_dim_order(dim_vals) - span = _guess_dim_span(dim_vals, order, precision) - return _build_dim(dim_vals, dim, order, span) + dim_vals = _dim_vals(coords, dim, precision) + return format(Dim{dim}(dim_vals)) +end +function _guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, precision::Int) + dim_vals = _dim_vals(coords, dim, precision) + return format(dim(dim_vals)) end -function _guess_dims(coords::AbstractVector, dim::Symbol, order::DD.Order, precision::Int) - dim_vals = _dim_vals(coords, order, precision) - span = _guess_dim_span(dim_vals, order, precision) - return _build_dim(dim_vals, dim, order, span) +function _guess_dims(coords::AbstractVector, dim::Dimension, precision::Int) + newl = _guess_dims(coords, lookup(dim), precision) + return format(rebuild(dim, newl)) end +function _guess_dims(coords::AbstractVector, l::Lookup, precision::Int) + dim_vals = _dim_vals(coords, l, precision) + return rebuild(l; data = dim_vals) +end +# lookup(dim) could just return a vector - then we keep those values +_guess_dims(coords::AbstractVector, l::AbstractVector, precision::Int) = l # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = DD.name(dims) + dim_cols = name(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end # Extract dimension column names from the given table _dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) -_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), DD.name(dims)) +_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), name(dims)) # Extract data columns from table function _data_cols(table, dims::Tuple) @@ -195,98 +201,82 @@ function _data_cols(table, dims::Tuple) return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) end +_coords_to_indices(table, dims::Tuple, sel, atol) = + _coords_to_indices(_dim_cols(table, dims), dims, sel, atol) # Determine the ordinality of a set of coordinates -_coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple, sel::DD.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DD.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) - -# Determine the ordinality of a set of regularly spaced numerical coordinates -function _coords_to_ords( - coords::AbstractVector{<:Real}, - dim::Dimension, - ::DimensionalData.Near, - position::DimensionalData.Position, - span::DimensionalData.Regular) - step = DD.step(span) - float_ords = ((coords .- first(dim)) ./ step) .+ 1 - int_ords = _round_ords(float_ords, position) - return clamp!(int_ords, 1, length(dim)) +function _coords_to_indices(coords::Tuple, dims::Tuple, sel, atol) + map(zip(coords...)) do coords + map(coords, dims) do c, d + _coords_to_indices(c, d, sel, atol) + end + end end - -# Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords( - coords::AbstractVector, - dim::Dimension, - sel::DimensionalData.Selector, - ::DimensionalData.Position, - ::DimensionalData.Span) - return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) +_coords_to_indices(coords::NamedTuple, dims::Tuple, sel, atol) = _coords_to_indices(map(x -> coords[x], name(dims)), dims, sel, atol) +# implement some default selectors +_coords_to_indices(coord, dim::Dimension, sel::Nothing, atol) = + _coords_to_indices(coord, dim, _default_selector(dim), atol) + +# get indices of the coordinates +_coords_to_indices(coord, dim::Dimension, sel::Selector, atol) = + return rebuild(dim, selectindices(dim, rebuild(sel, coord))) +# get indices of the coordinates +_coords_to_indices(coord, dim::Dimension, sel::At, atol) = + return rebuild(dim, selectindices(dim, rebuild(sel; val = coord, atol))) + +function _default_selector(dim::Dimension{<:AbstractSampled}) + if sampling(dim) isa Intervals + Contains() + elseif isordered(dim) && !(eltype(dim) <: Integer) + Near() + else + At() + end end - -# Round coordinate ordinality to the appropriate integer given the specified locus -_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Start) = floor.(Int, ords) -_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Center) = round.(Int, ords) -_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.End) = ceil.(Int, ords) +_default_selector(dim::Dimension{<:AbstractCategorical}) = At() +_default_selector(dim::Dimension) = Near() # Extract dimension value from the given vector of coordinates -_dim_vals(coords::AbstractVector, precision::Int) = _unique_vals(coords, precision) -_dim_vals(coords::AbstractVector, ::DD.Order, precision::Int) = _unique_vals(coords, precision) -_dim_vals(coords::AbstractVector, ::DD.ForwardOrdered, precision::Int) = sort!(_unique_vals(coords, precision)) -_dim_vals(coords::AbstractVector, ::DD.ReverseOrdered, precision::Int) = sort!(_unique_vals(coords, precision), rev=true) +function _dim_vals(coords::AbstractVector, dim, precision::Int) + vals = _unique_vals(coords, precision) + return _maybe_as_range(vals, precision) +end +function _dim_vals(coords::AbstractVector, l::Lookup, precision::Int) + val(l) isa AutoValues || return val(l) # do we want to have some kind of check that the values match? + vals = _unique_vals(coords, precision) + _maybe_order!(vals, order(l)) + return _maybe_as_range(vals, precision) +end +_dim_vals(coords::AbstractVector, l::AbstractVector, precision::Int) = l # same comment as above? + +_maybe_order!(A::AbstractVector, ::Order) = A +_maybe_order!(A::AbstractVector, ::ForwardOrdered) = sort!(A) +_maybe_order!(A::AbstractVector, ::ReverseOrdered) = sort!(A, rev=true) # Extract all unique coordinates from the given vector _unique_vals(coords::AbstractVector, ::Int) = unique(coords) _unique_vals(coords::AbstractVector{<:Real}, precision::Int) = round.(coords, digits=precision) |> unique - -# Determine if the given coordinates are forward ordered, reverse ordered, or unordered -function _guess_dim_order(coords::AbstractVector) - try - if issorted(coords) - return DD.ForwardOrdered() - elseif issorted(coords, rev=true) - return DD.ReverseOrdered() - else - return DD.Unordered() - end - catch - return DD.Unordered() - end -end +_unique_vals(coords::AbstractVector{<:Integer}, ::Int) = unique(coords) # Estimate the span between consecutive coordinates -_guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular() -function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision::Int) - steps = round.((@view coords[2:end]) .- (@view coords[1:end-1]), digits=precision) - span = argmin(abs, steps) - return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() -end -function _guess_dim_span(coords::AbstractVector{<:Dates.AbstractTime}, ::DD.Ordered, precision::Int) - steps = (@view coords[2:end]) .- (@view coords[1:end-1]) - span = argmin(abs, steps) - return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() -end - -function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) - return rebuild(name2dim(dim), DD.Categorical(vals, order=order)) -end -function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) - return rebuild(name2dim(dim), DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) +_maybe_as_range(A::AbstractVector, precision) = A # for non-numeric types +function _maybe_as_range(A::AbstractVector{<:Real}, precision::Int) + A_r = range(first(A), last(A), length(A)) + atol = 10.0^(-precision) + return all(i -> isapprox(A_r[i], A[i]; atol), eachindex(A)) ? A_r : A end -function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) - n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) - dim_vals = StepRangeLen(first(vals), span.step, n) - return rebuild(name2dim(dim), DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) -end - -# Determine the index from a tuple of coordinate orders -function _ords_to_indices(ords, dims) - stride = 1 - indices = ones(Int, length(ords[1])) - for (ord, dim) in zip(ords, dims) - indices .+= (ord .- 1) .* stride - stride *= length(dim) +function _maybe_as_range(A::AbstractVector{<:Integer}, precision::Int) + idx1, idxrest = Iterators.peel(eachindex(A)) + step = A[idx1+1] - A[idx1] + for idx in idxrest + A[idx] - A[idx-1] == step || return A end - return indices + return first(A):step:last(A) +end +function _maybe_as_range(A::AbstractVector{<:Dates.AbstractTime}, precision::Int) + steps = (@view A[2:end]) .- (@view A[1:end-1]) + span = argmin(abs, steps) + isregular = all(isinteger, round.(steps ./ span, digits=precision)) + return isregular ? range(first(A), last(A), length(A)) : A end _cast_missing(::AbstractArray, missingval::Missing) = missing diff --git a/test/tables.jl b/test/tables.jl index 23ea9eed5..c9c6cf331 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -163,39 +163,91 @@ end t = DataFrame(ds) t1 = Random.shuffle(t) t2 = t[101:end,:] - - # Restore DimArray from shuffled table - @test all(DimArray(t1, dims(ds)) .== a) - @test all(DimArray(t1, dims(ds), name="a") .== a) - @test all(DimArray(t1, dims(ds), name="b") .== b) - @test all(DimArray(t1, dims(ds), name="c") .== c) - - # Restore DimArray from table with missing rows - @test all(DimArray(t2, dims(ds), name="a")[Y(2:100)] .== a[Y(2:100)]) - @test all(DimArray(t2, dims(ds), name="b")[Y(2:100)] .== b[Y(2:100)]) - @test all(DimArray(t2, dims(ds), name="c")[Y(2:100)] .== c[Y(2:100)]) - @test DimArray(t2, dims(ds), name="a")[Y(1)] .|> ismissing |> all - @test DimArray(t2, dims(ds), name="b")[Y(1)] .|> ismissing |> all - @test DimArray(t2, dims(ds), name="c")[Y(1)] .|> ismissing |> all - @test DimArray(t2, dims(ds), name="a")[Y(2:100)] .|> ismissing .|> (!) |> all - @test DimArray(t2, dims(ds), name="b")[Y(2:100)] .|> ismissing .|> (!) |> all - @test DimArray(t2, dims(ds), name="c")[Y(2:100)] .|> ismissing .|> (!) |> all - - # Restore DimStack from shuffled table - restored_stack = DimStack(t1, dims(ds)) - @test all(restored_stack.a .== ds.a) - @test all(restored_stack.b .== ds.b) - @test all(restored_stack.c .== ds.c) - - # Restore DimStack from table with missing rows - restored_stack = DimStack(t2, dims(ds)) - @test all(restored_stack.a[Y(2:100)] .== ds.a[Y(2:100)]) - @test all(restored_stack.b[Y(2:100)] .== ds.b[Y(2:100)]) - @test all(restored_stack.c[Y(2:100)] .== ds.c[Y(2:100)]) - @test restored_stack.a[Y(1)] .|> ismissing |> all - @test restored_stack.b[Y(1)] .|> ismissing |> all - @test restored_stack.c[Y(1)] .|> ismissing |> all - @test restored_stack.a[Y(2:100)] .|> ismissing .|> (!) |> all - @test restored_stack.b[Y(2:100)] .|> ismissing .|> (!) |> all - @test restored_stack.c[Y(2:100)] .|> ismissing .|> (!) |> all + t3 = copy(t1) + t3.X .+= rand(nrow(t1)) .* 1e-7 # add some random noise to check if precision works + + tabletypes = (Tables.rowtable, Tables.columntable, DataFrame) + + for type in tabletypes + t = type(t) + t1 = type(t1) + t2 = type(t2) + t3 = type(t3) + @testset "All dimensions passed (using $type)" begin + # Restore DimArray from shuffled table + for table = (t1, t3) + @test all(DimArray(table, dims(ds)) .== a) + @test all(DimArray(table, dims(ds), name="a") .== a) + @test all(DimArray(table, dims(ds), name="b") .== b) + @test all(DimArray(table, dims(ds), name="c") .== c) + end + + # Restore DimArray from table with missing rows + @test all(DimArray(t2, dims(ds), name="a")[Y(2:100)] .== a[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="b")[Y(2:100)] .== b[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="c")[Y(2:100)] .== c[Y(2:100)]) + @test DimArray(t2, dims(ds), name="a")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="b")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="c")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="a")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="b")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="c")[Y(2:100)] .|> ismissing .|> (!) |> all + + # Restore DimStack from shuffled table + restored_stack = DimStack(t1, dims(ds)) + @test all(restored_stack.a .== ds.a) + @test all(restored_stack.b .== ds.b) + @test all(restored_stack.c .== ds.c) + + # Restore DimStack from table with missing rows + restored_stack = DimStack(t2, dims(ds)) + @test all(restored_stack.a[Y(2:100)] .== ds.a[Y(2:100)]) + @test all(restored_stack.b[Y(2:100)] .== ds.b[Y(2:100)]) + @test all(restored_stack.c[Y(2:100)] .== ds.c[Y(2:100)]) + @test restored_stack.a[Y(1)] .|> ismissing |> all + @test restored_stack.b[Y(1)] .|> ismissing |> all + @test restored_stack.c[Y(1)] .|> ismissing |> all + @test restored_stack.a[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.b[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.c[Y(2:100)] .|> ismissing .|> (!) |> all + end + + @testset "Dimensions automatically detected (using $type)" begin + da3 = DimArray(t) + # Awkward test, see https://github.com/rafaqz/DimensionalData.jl/issues/953 + # If Dim{:X} == X then we can just test for equality + @test lookup(dims(da3, :X)) == lookup(dims(a, X)) + @test lookup(dims(da3, :Y)) == lookup(dims(a, Y)) + @test parent(da3) == parent(a) + + for table in (t1, t3) + da = DimArray(table) + @test parent(da[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + end + end + + @testset "Dimensions partially specified (using $type)" begin + for table in (t1, t3) + # setting the order returns ordered dimensions + da = DimArray(table, (X(Sampled(order = ReverseOrdered())), Y(Sampled(order=ForwardOrdered())))) + @test dims(da, X) == dims(a, X) + @test dims(da, Y) == dims(a, Y) + end + # passing in dimension types works + @test DimArray(t, (X, Y)) == a + @test parent(DimArray(t, (:X, Y))) == parent(a) + @test parent(DimArray(t, (:X, :Y))) == parent(a) + # passing in dimensions works for unconventional dimension names + A = rand(dimz, name = :a) + table = type(A) + @test DimArray(table, (X, Y(Sampled(span = Irregular())), :test)) == A + # Specifying dimensions types works even if it's illogical. + dat = DimArray(t, (X(Sampled(span = Irregular(), order = Unordered())), Y(Categorical()))) + x, y = dims(dat) + @test !isregular(x) + @test !isordered(x) + @test iscategorical(y) + @test isordered(y) # this is automatically detected + end + end end \ No newline at end of file From 95fe3f611eb2985b1031c821d3a081d45ae15914 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 6 May 2025 11:37:55 +0300 Subject: [PATCH 36/51] fix scuffed merge --- src/stack/stack.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 34858d635..16d0c9510 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -490,7 +490,7 @@ function DimStack(das::NamedTuple{<:Any,<:Tuple{Vararg{AbstractDimArray}}}; end DimStack(data::Union{Tuple,AbstractArray,NamedTuple}, dim::Dimension; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), (dim,); kw...) -DimStack(data::Union{Tuple,AbstractArray}, dims::Tuple; name=uniquekeys(data), kw...) = +DimStack(data::Union{Tuple,AbstractArray{<:AbstractArray}}, dims::Tuple; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), dims; kw...) function DimStack(data::NamedTuple{K}, dims::Tuple; refdims=(), @@ -498,6 +498,8 @@ function DimStack(data::NamedTuple{K}, dims::Tuple; layermetadata=nothing, layerdims=nothing ) where K + Tables.istable(data) && all(d -> name(d) in keys(data), dims) && + return _dimstack_from_table(data, dims; refdims, metadata) layerdims = if isnothing(layerdims) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() map(_ -> basedims(dims), data) @@ -521,9 +523,8 @@ function DimStack(st::AbstractDimStack; metadata=metadata(st), layermetadata=layermetadata(st), ) + DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end -DimStack(st::AbstractDimStack) = - DimStack(data(st), dims(st), refdims(st), layerdims(st), metadata(st), layermetadata(st)) # Write each column from a table with one or more coordinate columns to a layer in a DimStack DimStack(table, dims::Tuple; kw...) = _dimstack_from_table(table, dims; kw...) DimStack(table; kw...) = _dimstack_from_table(table, guess_dims(table); kw...) From 81a32e172ae2b6dd5d5ea10de4aa79e428f5affe Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Wed, 7 May 2025 09:12:39 +0300 Subject: [PATCH 37/51] filter instead of indexing in test for clarity --- test/tables.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tables.jl b/test/tables.jl index c9c6cf331..5c06ab3e2 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -162,7 +162,7 @@ end ds = DimStack((a=a, b=b, c=c)) t = DataFrame(ds) t1 = Random.shuffle(t) - t2 = t[101:end,:] + t2 = filter(r -> r.Y != -250, t) t3 = copy(t1) t3.X .+= rand(nrow(t1)) .* 1e-7 # add some random noise to check if precision works From 7b0f5e823930a5ab8c2b08e93fc7aec03a9b82c5 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 28 Jun 2025 19:09:13 +1000 Subject: [PATCH 38/51] fix DimSlices doc --- docs/src/api/reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index d11667e91..16d905813 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -55,7 +55,7 @@ prune DimIndices DimSelectors DimPoints -DimSlices +DimensionalData.DimSlices ``` ## Tables.jl/TableTraits.jl interface From 5ba06cefe3b0002e9d78673eed18414006787a32 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 28 Jun 2025 19:38:27 +1000 Subject: [PATCH 39/51] fix ambiguities --- src/array/array.jl | 35 ++++++++++++++++++++++------------- src/stack/stack.jl | 28 +++++++++++++++++++--------- src/table_ops.jl | 46 ++++++++++++++++++++++++++++++++-------------- src/tree/tree.jl | 9 +++++++-- test/stack.jl | 6 ++---- 5 files changed, 82 insertions(+), 42 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index c75f7c636..75a893e2f 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -514,32 +514,41 @@ function DimArray(A::AbstractBasicDimArray; newdata = collect(data) DimArray(newdata, format(dims, newdata); refdims, name, metadata) end +# Tables # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; kw...) +function DimArray(table, dims; kw...) # Confirm that the Tables interface is implemented Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) - _dimarray_from_table(table, guess_dims(table, dims); kw...) + dimarray_from_table(table, guess_dims(table, dims); kw...) end +# Same as above, but guess dimension names from scratch +function DimArray(table; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) + # Use default dimension + return dimarray_from_table(table, guess_dims(table; kw...); kw...) +end +# Special-case NamedTuple tables function DimArray(data::AbstractVector{<:NamedTuple{K}}, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), kw... ) where K if all(map(d -> Dimensions.name(d) in K, dims)) table = Tables.columns(data) - return _dimarray_from_table(table, guess_dims(table, dims; kw...); - refdims, name, metadata, kw...) + dims = guess_dims(table, dims; kw...) + return dimarray_from_table(table, dims; refdims, name, metadata, kw...) else return DimArray(data, format(dims, data), refdims, name, metadata) end end -# Same as above, but guess dimension names -function DimArray(table; kw...) - # Confirm that the Tables interface is implemented - Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) - table = Tables.columnaccess(table) ? table : Tables.columns(table) - # Use default dimension - return _dimarray_from_table(table, guess_dims(table; kw...); kw...) -end -function _dimarray_from_table(table, dims; name=NoName(), selector=nothing, precision=6, missingval=missing, kw...) + +function dimarray_from_table(table, dims; + name=NoName(), + selector=nothing, + precision=6, + missingval=missing, + kw... +) # Determine row indices based on coordinate values indices = coords_to_indices(table, dims; selector, atol=10.0^-precision) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 2ff0a593d..e4c712871 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -30,9 +30,12 @@ const AbstractVectorDimStack = AbstractDimStack{K,T,1} where {K,T} const AbstractMatrixDimStack = AbstractDimStack{K,T,2} where {K,T} (::Type{T})(st::AbstractDimStack; kw...) where T<:AbstractDimArray = + dimarray_from_dimstack(T, st; kw...) +# For ambiguity +DimArray(st::AbstractDimStack; kw...) = dimarray_from_dimstack(DimArray, st; kw...) + +dimarray_from_dimstack(T, st; kw...) = T([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st), kw...) -# for ambiguity -DimArray(st::AbstractDimStack) = T([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st)) data(s::AbstractDimStack) = getfield(s, :data) dims(s::AbstractDimStack) = getfield(s, :dims) @@ -103,7 +106,7 @@ and an existing stack. # Keywords -Keywords are simply the fields of the stack object: +Keywords are simply the common fields of an `AbstractDimStack` object: - `data` - `dims` @@ -111,6 +114,8 @@ Keywords are simply the fields of the stack object: - `metadata` - `layerdims` - `layermetadata` + +There is no promise that these keywords will be used in all cases. """ function rebuild_from_arrays( s::AbstractDimStack{Keys}, das::Tuple{Vararg{AbstractBasicDimArray}}; kw... @@ -513,7 +518,7 @@ function DimStack(das::NamedTuple{<:Any,<:Tuple{Vararg{AbstractDimArray}}}; ) # Treat as a table if the dims correspond to data columns. Tables.istable(data) && all(d -> name(d) in keys(data), dims) && - return _dimstack_from_table(data, dims; refdims, metadata) + return dimstack_from_table(data, dims; refdims, metadata) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end @@ -528,7 +533,7 @@ function DimStack(data::NamedTuple{K}, dims::Tuple; layerdims=nothing ) where K Tables.istable(data) && all(d -> name(d) in keys(data), dims) && - return _dimstack_from_table(data, dims; refdims, metadata) + return dimstack_from_table(data, dims; refdims, metadata) layerdims = if isnothing(layerdims) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() map(_ -> basedims(dims), data) @@ -555,10 +560,15 @@ function DimStack(st::AbstractDimStack; DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -DimStack(table, dims::Tuple; kw...) = _dimstack_from_table(table, dims; kw...) -DimStack(table; kw...) = _dimstack_from_table(table, guess_dims(table); kw...) +DimStack(table, dims::Tuple; kw...) = dimstack_from_table(table, dims; kw...) +DimStack(table; kw...) = dimstack_from_table(table, guess_dims(table); kw...) -function _dimstack_from_table(table, dims; selector=nothing, precision=6, missingval = missing, kw...) +function dimstack_from_table(table, dims; + selector=nothing, + precision=6, + missingval=missing, + kw... +) table = Tables.columnaccess(table) ? table : Tables.columns(table) data_cols = _data_cols(table, dims) dims = guess_dims(table, dims, precision=precision) @@ -569,4 +579,4 @@ function _dimstack_from_table(table, dims; selector=nothing, precision=6, missin return DimStack(layers, dims; kw...) end -layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) \ No newline at end of file +layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) diff --git a/src/table_ops.jl b/src/table_ops.jl index 0a9026166..d55ac2be7 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,15 +1,22 @@ + +const TABLE_ARGUMENT = """ +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. +""" + """ restore_array(data::AbstractVector, indices::AbstractVector{<:NTuple{<:Any, Dim}}, dims::Tuple, missingval) Restore a dimensional array from its tabular representation. # Arguments + - `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. - `indices`: An `AbstractVector` containing the dimensional indices corresponding to each element in `data`. - `dims`: The dimensions of the destination `DimArray`. - `missingval`: The value to write for missing elements in `data`. # Returns + An `Array` containing the ordered valued in `data` with the size specified by `dims`. ``` """ @@ -33,19 +40,24 @@ function restore_array(data::AbstractVector, indices::AbstractVector, dims::Tupl end """ - coords_to_indices(table, dims; [selector], [atol]) + coords_to_indices(table, dims; [selector, atol]) Return the dimensional index of each row in `table` based on its associated coordinates. Dimension columns are determined from the name of each dimension in `dims`. # Arguments -- `table`: A table representation of a dimensional array. + +$TABLE_ARGUMENT - `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. + +# Keywords + - `selector`: The selector type to use. This defaults to `Near()` for orderd, sampled dimensions and `At()` for all other dimensions. - `atol`: The absolute tolerance to use with `At()`. This defaults to `1e-6`. # Example + ```julia julia> d = rand(X(1:256), Y(1:256)); @@ -63,9 +75,8 @@ julia> coords_to_indices(t, dims(d)) (↓ X 256, → Y 256) ``` """ -function coords_to_indices(table, dims::Tuple; selector=nothing, atol = 1e-6) - return _coords_to_indices(table, dims, selector, atol) -end +coords_to_indices(table, dims::Tuple; selector=nothing, atol=1e-6) = + _coords_to_indices(table, dims, selector, atol) """ guess_dims(table; kw...) @@ -74,21 +85,25 @@ end Guesses the dimensions of an array based on the provided tabular representation. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. + +$TABLE_ARGUMENT The dimensions will be inferred from the corresponding coordinate collumns in the table. + - `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular value or as a `Pair` with both the dimensions and corresponding order. The order will be inferred from the data when none is given. This should work for sorted coordinates, but will not be sufficient when the table's rows are out of order. -# Keyword Arguments +# Keywords + - `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). # Returns A tuple containing the inferred dimensions from the table. # Example + ```julia julia> using DimensionalData, DataFrames @@ -128,9 +143,8 @@ julia> guess_dims(t_rand, X => ForwardOrdered, Y => ReverseOrdered, :Band => For ``` """ guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) -function guess_dims(table, dims::Tuple; precision=6, kw...) +guess_dims(table, dims::Tuple; precision=6, kw...) = map(dim -> _guess_dims(get_column(table, name(dim)), dim, precision), dims) -end """ get_column(table, dim::Type{<:Dimension}) @@ -140,7 +154,8 @@ end Retrieve the coordinate data stored in the column specified by `dim`. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. + +$TABLE_ARGUMENT - `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`. """ get_column(table, x::Type{<:Dimension}) = Tables.getcolumn(table, name(x)) @@ -153,7 +168,8 @@ get_column(table, x::Symbol) = Tables.getcolumn(table, x) Return the names of all columns that don't matched the dimensions given by `dims`. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. + +$TABLE_ARGUMENT - `dims`: A `Tuple` of one or more `Dimensions`. """ function data_col_names(table, dims::Tuple) @@ -161,8 +177,10 @@ function data_col_names(table, dims::Tuple) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end -_guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, args...) = _guess_dims(coords, name(dim), args...) -_guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) +_guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, args...) = + _guess_dims(coords, name(dim), args...) +_guess_dims(coords::AbstractVector, dim::Pair, args...) = + _guess_dims(coords, first(dim), last(dim), args...) function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: Order} return _guess_dims(coords, dim, T(), precision) end @@ -286,4 +304,4 @@ function _cast_missing(::AbstractArray{T}, missingval) where {T} catch e return missingval end -end \ No newline at end of file +end diff --git a/src/tree/tree.jl b/src/tree/tree.jl index 33c6cdc03..749016298 100644 --- a/src/tree/tree.jl +++ b/src/tree/tree.jl @@ -58,7 +58,12 @@ layerdims(dt::AbstractDimTree, key::Symbol) = layerdims(dt)[key] layers(dt::AbstractDimTree) = DataDict((pn => dt[pn] for pn in keys(dt))) # DimStack constructors on DimTree -function (::Type{T})(dt::AbstractDimTree; keep=nothing) where {T<:AbstractDimStack} +# If this method has ambiguities, define it for the DimStack type and call stack_from_tree +(::Type{T})(dt::AbstractDimTree; kw...) where {T<:AbstractDimStack} = + stack_from_tree(T, dt; kw...) +DimStack(dt::AbstractDimTree; kw...) = stack_from_tree(T, dt; kw...) + +function stack_from_tree(T, dt; keep=nothing) if isnothing(keep) pruned = DD.prune(dt; keep) T(pruned[Tuple(keys(pruned))]) @@ -430,4 +435,4 @@ end layerdims(layers::AbstractArray{<:Pair}) = TupleDict(map(((k, v),) -> k => basedims(v), layers)) layermetadata(layers::AbstractArray{<:Pair}) = - DataDict(map(((k, v),) -> k => metadata(v), layers)) \ No newline at end of file + DataDict(map(((k, v),) -> k => metadata(v), layers)) diff --git a/test/stack.jl b/test/stack.jl index f200a49cb..acdaa1ce2 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -34,9 +34,7 @@ mixed = DimStack(da1, da2, da4) DimStack((da1[:, 1], da2[:, 1], da3[:, 1]); name=(:one, :two, :three)) == DimStack(da1[:, 1], da2[:, 1], da3[:, 1]; name=(:one, :two, :three)) == DimStack(parent.([da1[:, 1], da2[:, 1], da3[:, 1]]), dimz[1]; name=(:one, :two, :three)) == s[:, 1] - @test dims(DimStack()) == dims(DimStack(NamedTuple())) == - dims(DimStack(())) == dims(DimStack(DimArray[])) == - dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () + @test dims(DimStack()) == dims(DimStack(())) == dims(DimStack(DimArray[])) == dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () @test DimStack([A, 2A, 3A], (Z(), Ti()); name=(:one, :two, :three), layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == DimStack((A, 2A, 3A), (Z(), Ti()); name=(:one, :two, :three), layerdims=(one=(Z(), Ti()), two=(Z(), Ti()), three=(Z(), Ti()))) == DimStack((one=A, two=2A, three=3A), (Z(), Ti()); layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == @@ -396,4 +394,4 @@ end @test ds[Z = 1] == (a = da1, b = da1) @test ds[Z = 1:2] == ds -end \ No newline at end of file +end From b2e99a53fbf51e27f4b88cbfe802e71c71f37da7 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 28 Jun 2025 20:20:39 +1000 Subject: [PATCH 40/51] bugfixes --- src/stack/stack.jl | 18 ++++++++---------- src/tree/tree.jl | 8 ++++---- test/stack.jl | 3 ++- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index e4c712871..6cb99625f 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -516,11 +516,7 @@ function DimStack(das::NamedTuple{<:Any,<:Tuple{Vararg{AbstractDimArray}}}; metadata=NoMetadata(), layermetadata=map(DD.metadata, das) ) - # Treat as a table if the dims correspond to data columns. - Tables.istable(data) && all(d -> name(d) in keys(data), dims) && - return dimstack_from_table(data, dims; refdims, metadata) - all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() - DimStack(data, dims, refdims, layerdims, metadata, layermetadata) + return DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end DimStack(data::Union{Tuple,AbstractArray,NamedTuple}, dim::Dimension; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), (dim,); kw...) @@ -532,8 +528,9 @@ function DimStack(data::NamedTuple{K}, dims::Tuple; layermetadata=nothing, layerdims=nothing ) where K - Tables.istable(data) && all(d -> name(d) in keys(data), dims) && + if length(data) > 0 && Tables.istable(data) && all(d -> name(d) in keys(data), dims) return dimstack_from_table(data, dims; refdims, metadata) + end layerdims = if isnothing(layerdims) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() map(_ -> basedims(dims), data) @@ -560,8 +557,9 @@ function DimStack(st::AbstractDimStack; DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -DimStack(table, dims::Tuple; kw...) = dimstack_from_table(table, dims; kw...) -DimStack(table; kw...) = dimstack_from_table(table, guess_dims(table); kw...) +DimStack(table, dims::Tuple; kw...) = + dimstack_from_table(table, guess_dims(table, dims; kw...); kw...) +DimStack(table; kw...) = dimstack_from_table(table, guess_dims(table; kw...); kw...) function dimstack_from_table(table, dims; selector=nothing, @@ -571,8 +569,8 @@ function dimstack_from_table(table, dims; ) table = Tables.columnaccess(table) ? table : Tables.columns(table) data_cols = _data_cols(table, dims) - dims = guess_dims(table, dims, precision=precision) - indices = coords_to_indices(table, dims; selector=selector) + dims = guess_dims(table, dims; precision) + indices = coords_to_indices(table, dims; selector) layers = map(data_cols) do d restore_array(d, indices, dims, missingval) end diff --git a/src/tree/tree.jl b/src/tree/tree.jl index 749016298..61c9ae6c5 100644 --- a/src/tree/tree.jl +++ b/src/tree/tree.jl @@ -58,12 +58,12 @@ layerdims(dt::AbstractDimTree, key::Symbol) = layerdims(dt)[key] layers(dt::AbstractDimTree) = DataDict((pn => dt[pn] for pn in keys(dt))) # DimStack constructors on DimTree -# If this method has ambiguities, define it for the DimStack type and call stack_from_tree +# If this method has ambiguities, define it for the DimStack type and call dimstack_from_tree (::Type{T})(dt::AbstractDimTree; kw...) where {T<:AbstractDimStack} = - stack_from_tree(T, dt; kw...) -DimStack(dt::AbstractDimTree; kw...) = stack_from_tree(T, dt; kw...) + dimstack_from_tree(T, dt; kw...) +DimStack(dt::AbstractDimTree; kw...) = dimstack_from_tree(DimStack, dt; kw...) -function stack_from_tree(T, dt; keep=nothing) +function dimstack_from_tree(T, dt; keep=nothing) if isnothing(keep) pruned = DD.prune(dt; keep) T(pruned[Tuple(keys(pruned))]) diff --git a/test/stack.jl b/test/stack.jl index acdaa1ce2..21355943f 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -34,7 +34,8 @@ mixed = DimStack(da1, da2, da4) DimStack((da1[:, 1], da2[:, 1], da3[:, 1]); name=(:one, :two, :three)) == DimStack(da1[:, 1], da2[:, 1], da3[:, 1]; name=(:one, :two, :three)) == DimStack(parent.([da1[:, 1], da2[:, 1], da3[:, 1]]), dimz[1]; name=(:one, :two, :three)) == s[:, 1] - @test dims(DimStack()) == dims(DimStack(())) == dims(DimStack(DimArray[])) == dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () + @test dims(DimStack()) == dims(DimStack(())) == dims(DimStack(DimArray[])) == + dims(DimStack(NamedTuple())) == dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () @test DimStack([A, 2A, 3A], (Z(), Ti()); name=(:one, :two, :three), layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == DimStack((A, 2A, 3A), (Z(), Ti()); name=(:one, :two, :three), layerdims=(one=(Z(), Ti()), two=(Z(), Ti()), three=(Z(), Ti()))) == DimStack((one=A, two=2A, three=3A), (Z(), Ti()); layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == From 12daf7a7a0f22b0db113cae696f82264c3a58f87 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 28 Jun 2025 17:44:02 +0200 Subject: [PATCH 41/51] do checks and call Tables.columns before constructing stack from table --- src/stack/stack.jl | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 6cb99625f..c858cad95 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -556,10 +556,35 @@ function DimStack(st::AbstractDimStack; ) DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end + # Write each column from a table with one or more coordinate columns to a layer in a DimStack -DimStack(table, dims::Tuple; kw...) = - dimstack_from_table(table, guess_dims(table, dims; kw...); kw...) -DimStack(table; kw...) = dimstack_from_table(table, guess_dims(table; kw...); kw...) +function DimStack(data, dims::Tuple; kw... +) + if Tables.istable(data) + table = Tables.columns(data) + all(map(d -> Dimensions.name(d) in keys(table), dims)) || throw(ArgumentError( + "All dimensions in dims must be in the table columns." + )) + dims = guess_dims(table, dims; kw...) + return dimstack_from_table(table, dims; kw...) + else + throw(ArgumentError( + """data must be a table with coordinate columns, an AbstractArray, + or a Tuple or NamedTuple of AbstractArrays""" + )) + + end +end +function DimStack(table; kw...) + if Tables.istable(table) + table = Tables.columns(table) + dimstack_from_table(table, guess_dims(table; kw...); kw...) + else + throw(ArgumentError( + """data must be a table with coordinate columns, an AbstractArray, + or a Tuple or NamedTuple of AbstractArrays""" + )) end +end function dimstack_from_table(table, dims; selector=nothing, From df37668f1b9f65821e43408557a0ec8eda0b3420 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 28 Jun 2025 17:45:10 +0200 Subject: [PATCH 42/51] test dimensions are automatically detected when constructing dimstack --- test/tables.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/tables.jl b/test/tables.jl index 822c3fabd..3b7a47994 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -230,6 +230,10 @@ end for table in (t1, t3) da = DimArray(table) @test parent(da[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + ds_ = DimStack(table) + @test keys(ds_) == (:a, :b, :c) + @test parent(ds_.a[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + end end From 1573bc2532f9fb7e1ec2b926bbcdcd23a5278d7c Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 30 Jun 2025 09:25:18 +0200 Subject: [PATCH 43/51] comments not docstrings for internals --- src/table_ops.jl | 108 +++++------------------------------------------ 1 file changed, 10 insertions(+), 98 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index d55ac2be7..588cab255 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,15 +1,6 @@ - -const TABLE_ARGUMENT = """ -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. -""" - -""" - restore_array(data::AbstractVector, indices::AbstractVector{<:NTuple{<:Any, Dim}}, dims::Tuple, missingval) - +#= Restore a dimensional array from its tabular representation. -# Arguments - - `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. - `indices`: An `AbstractVector` containing the dimensional indices corresponding to each element in `data`. - `dims`: The dimensions of the destination `DimArray`. @@ -18,8 +9,7 @@ Restore a dimensional array from its tabular representation. # Returns An `Array` containing the ordered valued in `data` with the size specified by `dims`. -``` -""" +=# function restore_array(data::AbstractVector, indices::AbstractVector, dims::Tuple, missingval) # Allocate Destination Array dst = DimArray{eltype(data)}(undef, dims) @@ -39,7 +29,7 @@ function restore_array(data::AbstractVector, indices::AbstractVector, dims::Tupl return dst end -""" +#= coords_to_indices(table, dims; [selector, atol]) Return the dimensional index of each row in `table` based on its associated coordinates. @@ -47,7 +37,7 @@ Dimension columns are determined from the name of each dimension in `dims`. # Arguments -$TABLE_ARGUMENT +- a table - `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. # Keywords @@ -55,30 +45,11 @@ $TABLE_ARGUMENT - `selector`: The selector type to use. This defaults to `Near()` for orderd, sampled dimensions and `At()` for all other dimensions. - `atol`: The absolute tolerance to use with `At()`. This defaults to `1e-6`. - -# Example - -```julia -julia> d = rand(X(1:256), Y(1:256)); - -julia> t = DimTable(d); - -julia> coords_to_indices(t, dims(d)) -65536-element Vector{Tuple{X{Int64}, Y{Int64}}}: - (↓ X 1, → Y 1) - (↓ X 2, → Y 1) - (↓ X 3, → Y 1) - (↓ X 4, → Y 1) - ⋮ - (↓ X 254, → Y 256) - (↓ X 255, → Y 256) - (↓ X 256, → Y 256) -``` -""" +=# coords_to_indices(table, dims::Tuple; selector=nothing, atol=1e-6) = _coords_to_indices(table, dims, selector, atol) -""" +#= guess_dims(table; kw...) guess_dims(table, dims; precision=6) @@ -86,7 +57,7 @@ Guesses the dimensions of an array based on the provided tabular representation. # Arguments -$TABLE_ARGUMENT +- a table The dimensions will be inferred from the corresponding coordinate collumns in the table. - `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default @@ -101,77 +72,18 @@ out of order. # Returns A tuple containing the inferred dimensions from the table. - -# Example - -```julia -julia> using DimensionalData, DataFrames - -julia> import DimensionalData: Lookups, guess_dims - -julia> xdims = X(LinRange{Float64}(610000.0, 661180.0, 2560)); - -julia> ydims = Y(LinRange{Float64}(6.84142e6, 6.79024e6, 2560)); - -julia> bdims = Dim{:Band}([:B02, :B03, :B04]); - -julia> d = DimArray(rand(UInt16, 2560, 2560, 3), (xdims, ydims, bdims)); - -julia> t = DataFrame(d); - -julia> t_rand = Random.shuffle(t); - -julia> dims(d) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, -↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered - -julia> guess_dims(t) -↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, -→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, -↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered - -julia> guess_dims(t, X, Y, :Band) -↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, -→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, -↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered - -julia> guess_dims(t_rand, X => ForwardOrdered, Y => ReverseOrdered, :Band => ForwardOrdered) -↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, -→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, -↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -``` -""" +=# guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) guess_dims(table, dims::Tuple; precision=6, kw...) = map(dim -> _guess_dims(get_column(table, name(dim)), dim, precision), dims) -""" - get_column(table, dim::Type{<:Dimension}) - get_column(table, dim::Dimension) - get_column(table, dim::Symbol) - -Retrieve the coordinate data stored in the column specified by `dim`. - -# Arguments - -$TABLE_ARGUMENT -- `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`. -""" +#Retrieve the coordinate data stored in the column specified by `dim`. get_column(table, x::Type{<:Dimension}) = Tables.getcolumn(table, name(x)) get_column(table, x::Dimension) = Tables.getcolumn(table, name(x)) get_column(table, x::Symbol) = Tables.getcolumn(table, x) -""" - data_col_names(table, dims::Tuple) - -Return the names of all columns that don't matched the dimensions given by `dims`. - -# Arguments -$TABLE_ARGUMENT -- `dims`: A `Tuple` of one or more `Dimensions`. -""" +#Return the names of all columns that don't match the dimensions given by `dims`. function data_col_names(table, dims::Tuple) dim_cols = name(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) From 28a252f86c4c59717e21a6a6fbf5f8a630ad0d65 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 30 Jun 2025 09:31:46 +0200 Subject: [PATCH 44/51] check for columnaccess if dims are passed --- src/array/array.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/array/array.jl b/src/array/array.jl index 75a893e2f..1eafb2e06 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -519,6 +519,7 @@ end function DimArray(table, dims; kw...) # Confirm that the Tables interface is implemented Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) dimarray_from_table(table, guess_dims(table, dims); kw...) end # Same as above, but guess dimension names from scratch From aec86a9600d6ceb3af0aaa7dadc0f051c638a87d Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Mon, 30 Jun 2025 09:33:58 +0200 Subject: [PATCH 45/51] add type argument to dimarray_from_table --- src/array/array.jl | 12 ++++++------ src/stack/stack.jl | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 1eafb2e06..6eb1e0b11 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -520,7 +520,7 @@ function DimArray(table, dims; kw...) # Confirm that the Tables interface is implemented Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) table = Tables.columnaccess(table) ? table : Tables.columns(table) - dimarray_from_table(table, guess_dims(table, dims); kw...) + dimarray_from_table(DimArray, table, guess_dims(table, dims); kw...) end # Same as above, but guess dimension names from scratch function DimArray(table; kw...) @@ -528,7 +528,7 @@ function DimArray(table; kw...) Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) table = Tables.columnaccess(table) ? table : Tables.columns(table) # Use default dimension - return dimarray_from_table(table, guess_dims(table; kw...); kw...) + return dimarray_from_table(DimArray, table, guess_dims(table; kw...); kw...) end # Special-case NamedTuple tables function DimArray(data::AbstractVector{<:NamedTuple{K}}, dims::Tuple; @@ -537,19 +537,19 @@ function DimArray(data::AbstractVector{<:NamedTuple{K}}, dims::Tuple; if all(map(d -> Dimensions.name(d) in K, dims)) table = Tables.columns(data) dims = guess_dims(table, dims; kw...) - return dimarray_from_table(table, dims; refdims, name, metadata, kw...) + return dimarray_from_table(DimArray, table, dims; refdims, name, metadata, kw...) else return DimArray(data, format(dims, data), refdims, name, metadata) end end -function dimarray_from_table(table, dims; +function dimarray_from_table(::Type{T}, table, dims; name=NoName(), selector=nothing, precision=6, missingval=missing, kw... -) +) where T <: AbstractDimArray # Determine row indices based on coordinate values indices = coords_to_indices(table, dims; selector, atol=10.0^-precision) @@ -561,7 +561,7 @@ function dimarray_from_table(table, dims; array = restore_array(data, indices, dims, missingval) # Return DimArray - return DimArray(array, dims, name=col; kw...) + return T(array, dims, name=col; kw...) end """ diff --git a/src/stack/stack.jl b/src/stack/stack.jl index c858cad95..80abc911b 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -529,7 +529,7 @@ function DimStack(data::NamedTuple{K}, dims::Tuple; layerdims=nothing ) where K if length(data) > 0 && Tables.istable(data) && all(d -> name(d) in keys(data), dims) - return dimstack_from_table(data, dims; refdims, metadata) + return dimstack_from_table(DimStack, data, dims; refdims, metadata) end layerdims = if isnothing(layerdims) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() @@ -566,7 +566,7 @@ function DimStack(data, dims::Tuple; kw... "All dimensions in dims must be in the table columns." )) dims = guess_dims(table, dims; kw...) - return dimstack_from_table(table, dims; kw...) + return dimstack_from_table(DimStack, table, dims; kw...) else throw(ArgumentError( """data must be a table with coordinate columns, an AbstractArray, @@ -578,7 +578,7 @@ end function DimStack(table; kw...) if Tables.istable(table) table = Tables.columns(table) - dimstack_from_table(table, guess_dims(table; kw...); kw...) + dimstack_from_table(DimStack, table, guess_dims(table; kw...); kw...) else throw(ArgumentError( """data must be a table with coordinate columns, an AbstractArray, @@ -586,12 +586,12 @@ function DimStack(table; kw...) )) end end -function dimstack_from_table(table, dims; +function dimstack_from_table(::Type{T}, table, dims; selector=nothing, precision=6, missingval=missing, kw... -) +) where T<:AbstractDimStack table = Tables.columnaccess(table) ? table : Tables.columns(table) data_cols = _data_cols(table, dims) dims = guess_dims(table, dims; precision) @@ -599,7 +599,7 @@ function dimstack_from_table(table, dims; layers = map(data_cols) do d restore_array(d, indices, dims, missingval) end - return DimStack(layers, dims; kw...) + return T(layers, dims; kw...) end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) From 305ab4d90339a6f744fab5c6a4d80b724873a3c5 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Fri, 4 Jul 2025 17:15:16 +0200 Subject: [PATCH 46/51] allow passing name to DimStack --- src/stack/stack.jl | 8 +++++--- src/table_ops.jl | 6 ------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 80abc911b..883df4e8c 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -587,19 +587,21 @@ function DimStack(table; kw...) end function dimstack_from_table(::Type{T}, table, dims; + name=nothing, selector=nothing, precision=6, missingval=missing, kw... ) where T<:AbstractDimStack table = Tables.columnaccess(table) ? table : Tables.columns(table) - data_cols = _data_cols(table, dims) + data_cols = isnothing(name) ? data_col_names(table, dims) : name dims = guess_dims(table, dims; precision) indices = coords_to_indices(table, dims; selector) - layers = map(data_cols) do d + layers = map(data_cols) do col + d = Tables.getcolumn(table, col) restore_array(d, indices, dims, missingval) end - return T(layers, dims; kw...) + return T(layers, dims; name = data_cols, kw...) end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) diff --git a/src/table_ops.jl b/src/table_ops.jl index 588cab255..baf45340a 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -125,12 +125,6 @@ end _dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) _dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), name(dims)) -# Extract data columns from table -function _data_cols(table, dims::Tuple) - data_cols = data_col_names(table, dims) - return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) -end - _coords_to_indices(table, dims::Tuple, sel, atol) = _coords_to_indices(_dim_cols(table, dims), dims, sel, atol) # Determine the ordinality of a set of coordinates From dad3bc8cf58fa47d80cf1e6da08349f782ad2753 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Fri, 4 Jul 2025 17:16:56 +0200 Subject: [PATCH 47/51] add a section to the documentation --- docs/src/tables.md | 72 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/docs/src/tables.md b/docs/src/tables.md index d872ccce8..498f242c7 100644 --- a/docs/src/tables.md +++ b/docs/src/tables.md @@ -2,12 +2,22 @@ [Tables.jl](https://github.com/JuliaData/Tables.jl) provides an ecosystem-wide interface to tabular data in Julia, ensuring interoperability with [DataFrames.jl](https://dataframes.juliadata.org/stable/), [CSV.jl](https://csv.juliadata.org/stable/), and hundreds of other packages that implement the standard. +## Dimensional data are tables DimensionalData.jl implements the Tables.jl interface for `AbstractDimArray` and `AbstractDimStack`. `DimStack` layers are unrolled so they are all the same size, and dimensions loop to match the length of the largest layer. Columns are given the [`name`](@ref) of the array or stack layer, and the result of `DD.name(dimension)` for `Dimension` columns. -Looping of dimensions and stack layers is done _lazily_, -and does not allocate unless collected. +Looping of dimensions and stack layers is done _lazily_, and does not allocate unless collected. + +## Materializing tables to DimArray or DimStack +`DimArray` and `DimStack` have fallback methods to materialize any `Tables.jl`-compatible table. + +By default, it will treat columns such as X, Y, Z, and Band as dimensions, and other columns as data. +Pass a `name` keyword argument to determine which column(s) are used. + +You have full control over which columns are dimensions - and what those dimensions look like exactly. If you pass a `Tuple` of `Symbol` or dimension types (e.g. `X`) as the second argument, those columns are treated as dimensions. Passing a `Tuple` of dimensions preserves these dimensions - with values matched to the corresponding columns. + +Materializing tables will worked even if the table is not ordered, and can handle missing values. ## Example @@ -89,3 +99,61 @@ using CSV CSV.write("dimstack.csv", st) readlines("dimstack.csv") ```` + +## Converting a DataFrame to a DimArray or DimStack + +The Dataframe we use will have 5 columns: X, Y, category, data1, and data2 + +````@ansi dataframe +df = DataFrame(st) +```` + +::: tabs + +== Create a `DimArray` + +Converting this DataFrame to a DimArray without other arguments will read the `category` columns as data and ignore data1 and data2: + +````@ansi dataframe +DimArray(df) +```` + +Specify dimenion names to ensure these get treated as dimensions. Now data1 is read in instead. +````@ansi dataframe +DimArray(df, (X,Y,:category)) +```` + +You can also pass in the actual dimensions. +````@ansi dataframe +DimArray(df, dims(st)) +```` + +Pass in a name argument to read in data2 instead. +````@ansi dataframe +DimArray(df, dims(st); name = :data2) +```` + +== Create a `DimStack` + +Converting the DataFrame to a `DimStack` will by default read category, data1, and data2 as layers +````@ansi dataframe +DimStack(df) +```` + + +Specify dimenion names to ensure these get treated as dimensions. Now data1 and data2 are layers. +````@ansi dataframe +DimStack(df, (X,Y,:category)) +```` + +You can also pass in the actual dimensions. +````@ansi dataframe +DimStack(df, dims(st)) +```` + +Pass in a tuple of column names to control which columns are read. +````@ansi dataframe +DimStack(df, dims(st); name = (:data2,)) +```` + +::: \ No newline at end of file From 056c1e80ca1533490cb9cb15992bd86c173bd150 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 5 Jul 2025 12:49:11 +0200 Subject: [PATCH 48/51] use Tables.columnnames instead of keys --- src/stack/stack.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 883df4e8c..9ff23e9f3 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -562,7 +562,7 @@ function DimStack(data, dims::Tuple; kw... ) if Tables.istable(data) table = Tables.columns(data) - all(map(d -> Dimensions.name(d) in keys(table), dims)) || throw(ArgumentError( + all(map(d -> Dimensions.name(d) in Tables.columnnames(table), dims)) || throw(ArgumentError( "All dimensions in dims must be in the table columns." )) dims = guess_dims(table, dims; kw...) From 74578a5de44f97e867fea842da99fd476b7da11a Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 5 Jul 2025 12:54:45 +0200 Subject: [PATCH 49/51] make DimArray work with all tables that are abstractarrays --- src/array/array.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 6eb1e0b11..0b9d95bd7 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -530,11 +530,11 @@ function DimArray(table; kw...) # Use default dimension return dimarray_from_table(DimArray, table, guess_dims(table; kw...); kw...) end -# Special-case NamedTuple tables -function DimArray(data::AbstractVector{<:NamedTuple{K}}, dims::Tuple; +# Special-case for AbstractVectors - these might be tables +function DimArray(data::AbstractVector, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), kw... -) where K - if all(map(d -> Dimensions.name(d) in K, dims)) +) + if Tables.istable(data) && all(map(d -> Dimensions.name(d) in Tables.schema(data).names, dims)) table = Tables.columns(data) dims = guess_dims(table, dims; kw...) return dimarray_from_table(DimArray, table, dims; refdims, name, metadata, kw...) From e32037f799b33e323d90aee43395a8cacf177db5 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 5 Jul 2025 13:18:56 +0200 Subject: [PATCH 50/51] do not treat dimvectors as tables --- src/array/array.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 0b9d95bd7..f160834ef 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -534,7 +534,8 @@ end function DimArray(data::AbstractVector, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), kw... ) - if Tables.istable(data) && all(map(d -> Dimensions.name(d) in Tables.schema(data).names, dims)) + if !(data isa AbstractBasicDimArray) && Tables.istable(data) && + all(map(d -> Dimensions.name(d) in Tables.schema(data).names, dims)) table = Tables.columns(data) dims = guess_dims(table, dims; kw...) return dimarray_from_table(DimArray, table, dims; refdims, name, metadata, kw...) From a9ebc202e0efbd3dbcfa20af4fdc5a41614be5bd Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Sat, 5 Jul 2025 13:19:05 +0200 Subject: [PATCH 51/51] simplify get_column --- src/table_ops.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index baf45340a..38ad85dc9 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -78,9 +78,7 @@ guess_dims(table, dims::Tuple; precision=6, kw...) = map(dim -> _guess_dims(get_column(table, name(dim)), dim, precision), dims) #Retrieve the coordinate data stored in the column specified by `dim`. -get_column(table, x::Type{<:Dimension}) = Tables.getcolumn(table, name(x)) -get_column(table, x::Dimension) = Tables.getcolumn(table, name(x)) -get_column(table, x::Symbol) = Tables.getcolumn(table, x) +get_column(table, x) = Tables.getcolumn(table, name(x)) #Return the names of all columns that don't match the dimensions given by `dims`.