diff --git a/docs/src/tables.md b/docs/src/tables.md index d872ccce8..498f242c7 100644 --- a/docs/src/tables.md +++ b/docs/src/tables.md @@ -2,12 +2,22 @@ [Tables.jl](https://github.com/JuliaData/Tables.jl) provides an ecosystem-wide interface to tabular data in Julia, ensuring interoperability with [DataFrames.jl](https://dataframes.juliadata.org/stable/), [CSV.jl](https://csv.juliadata.org/stable/), and hundreds of other packages that implement the standard. +## Dimensional data are tables DimensionalData.jl implements the Tables.jl interface for `AbstractDimArray` and `AbstractDimStack`. `DimStack` layers are unrolled so they are all the same size, and dimensions loop to match the length of the largest layer. Columns are given the [`name`](@ref) of the array or stack layer, and the result of `DD.name(dimension)` for `Dimension` columns. -Looping of dimensions and stack layers is done _lazily_, -and does not allocate unless collected. +Looping of dimensions and stack layers is done _lazily_, and does not allocate unless collected. + +## Materializing tables to DimArray or DimStack +`DimArray` and `DimStack` have fallback methods to materialize any `Tables.jl`-compatible table. + +By default, it will treat columns such as X, Y, Z, and Band as dimensions, and other columns as data. +Pass a `name` keyword argument to determine which column(s) are used. + +You have full control over which columns are dimensions - and what those dimensions look like exactly. If you pass a `Tuple` of `Symbol` or dimension types (e.g. `X`) as the second argument, those columns are treated as dimensions. Passing a `Tuple` of dimensions preserves these dimensions - with values matched to the corresponding columns. + +Materializing tables will worked even if the table is not ordered, and can handle missing values. ## Example @@ -89,3 +99,61 @@ using CSV CSV.write("dimstack.csv", st) readlines("dimstack.csv") ```` + +## Converting a DataFrame to a DimArray or DimStack + +The Dataframe we use will have 5 columns: X, Y, category, data1, and data2 + +````@ansi dataframe +df = DataFrame(st) +```` + +::: tabs + +== Create a `DimArray` + +Converting this DataFrame to a DimArray without other arguments will read the `category` columns as data and ignore data1 and data2: + +````@ansi dataframe +DimArray(df) +```` + +Specify dimenion names to ensure these get treated as dimensions. Now data1 is read in instead. +````@ansi dataframe +DimArray(df, (X,Y,:category)) +```` + +You can also pass in the actual dimensions. +````@ansi dataframe +DimArray(df, dims(st)) +```` + +Pass in a name argument to read in data2 instead. +````@ansi dataframe +DimArray(df, dims(st); name = :data2) +```` + +== Create a `DimStack` + +Converting the DataFrame to a `DimStack` will by default read category, data1, and data2 as layers +````@ansi dataframe +DimStack(df) +```` + + +Specify dimenion names to ensure these get treated as dimensions. Now data1 and data2 are layers. +````@ansi dataframe +DimStack(df, (X,Y,:category)) +```` + +You can also pass in the actual dimensions. +````@ansi dataframe +DimStack(df, dims(st)) +```` + +Pass in a tuple of column names to control which columns are read. +````@ansi dataframe +DimStack(df, dims(st); name = (:data2,)) +```` + +::: \ No newline at end of file diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 87bf6384f..c4f1ea4e0 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -92,6 +92,7 @@ const DD = DimensionalData # Common include("interface.jl") include("name.jl") +include("table_ops.jl") # Arrays include("array/array.jl") diff --git a/src/Dimensions/dimension.jl b/src/Dimensions/dimension.jl index f5e2b2866..61d08d815 100644 --- a/src/Dimensions/dimension.jl +++ b/src/Dimensions/dimension.jl @@ -178,6 +178,7 @@ lookup(dim::Union{DimType,Val{<:Dimension}}) = NoLookup() name(dim::Dimension) = name(typeof(dim)) name(dim::Val{D}) where D = name(D) name(dim::Type{D}) where D<:Dimension = nameof(D) +name(s::Symbol) = s label(x) = string(name(x)) diff --git a/src/array/array.jl b/src/array/array.jl index 38fbe991e..f160834ef 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -144,7 +144,8 @@ function Base.NamedTuple(A1::AbstractDimArray, As::AbstractDimArray...) end # undef constructor for all AbstractDimArray -(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{<:Any}} = A(x, dims; kw...) +(::Type{A})(x::UndefInitializer, dims::Dimension...; kw...) where {A<:AbstractDimArray{T}} where T = + A(x, dims; kw...) function (::Type{A})(x::UndefInitializer, dims::DimTuple; kw...) where {A<:AbstractDimArray{T}} where T basetypeof(A)(Array{T}(undef, size(dims)), dims; kw...) end @@ -410,13 +411,14 @@ moves dimensions to reference dimension `refdims` after reducing operations ## Arguments -- `data`: An `AbstractArray`. +- `data`: An `AbstractArray` or a table with coordinate columns corresponding to `dims`. - `gen`: A generator expression. Where source iterators are `Dimension`s the dim args or kw is not needed. - `dims`: A `Tuple` of `Dimension` - `name`: A string name for the array. Shows in plots and tables. - `refdims`: refence dimensions. Usually set programmatically to track past slices and reductions of dimension for labelling and reconstruction. - `metadata`: `Dict` or `Metadata` object, or `NoMetadata()` +- `selector`: The coordinate selector type to use when materializing from a table. Indexing can be done with all regular indices, or with [`Dimension`](@ref)s and/or [`Selector`](@ref)s. @@ -512,6 +514,57 @@ function DimArray(A::AbstractBasicDimArray; newdata = collect(data) DimArray(newdata, format(dims, newdata); refdims, name, metadata) end +# Tables +# Write a single column from a table with one or more coordinate columns to a DimArray +function DimArray(table, dims; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`obj` must be an `AbstractArray` or satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) + dimarray_from_table(DimArray, table, guess_dims(table, dims); kw...) +end +# Same as above, but guess dimension names from scratch +function DimArray(table; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + table = Tables.columnaccess(table) ? table : Tables.columns(table) + # Use default dimension + return dimarray_from_table(DimArray, table, guess_dims(table; kw...); kw...) +end +# Special-case for AbstractVectors - these might be tables +function DimArray(data::AbstractVector, dims::Tuple; + refdims=(), name=NoName(), metadata=NoMetadata(), kw... +) + if !(data isa AbstractBasicDimArray) && Tables.istable(data) && + all(map(d -> Dimensions.name(d) in Tables.schema(data).names, dims)) + table = Tables.columns(data) + dims = guess_dims(table, dims; kw...) + return dimarray_from_table(DimArray, table, dims; refdims, name, metadata, kw...) + else + return DimArray(data, format(dims, data), refdims, name, metadata) + end +end + +function dimarray_from_table(::Type{T}, table, dims; + name=NoName(), + selector=nothing, + precision=6, + missingval=missing, + kw... +) where T <: AbstractDimArray + # Determine row indices based on coordinate values + indices = coords_to_indices(table, dims; selector, atol=10.0^-precision) + + # Extract the data column correspondong to `name` + col = name == NoName() ? data_col_names(table, dims) |> first : Symbol(name) + data = Tables.getcolumn(table, col) + + # Restore array data + array = restore_array(data, indices, dims, missingval) + + # Return DimArray + return T(array, dims, name=col; kw...) +end + """ DimArray(f::Function, dim::Dimension; [name]) @@ -520,7 +573,7 @@ Apply function `f` across the values of the dimension `dim` the given dimension. Optionally provide a name for the result. """ function DimArray(f::Function, dim::Dimension; name=Symbol(nameof(f), "(", name(dim), ")")) - DimArray(f.(val(dim)), (dim,); name) + DimArray(f.(val(dim)), (dim,); name) end DimArray(itr::Base.Generator; kwargs...) = rebuild(collect(itr); kwargs...) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 0e68a6395..655317fa4 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -30,6 +30,11 @@ const AbstractVectorDimStack = AbstractDimStack{K,T,1} where {K,T} const AbstractMatrixDimStack = AbstractDimStack{K,T,2} where {K,T} (::Type{T})(st::AbstractDimStack; kw...) where T<:AbstractDimArray = + dimarray_from_dimstack(T, st; kw...) +# For ambiguity +DimArray(st::AbstractDimStack; kw...) = dimarray_from_dimstack(DimArray, st; kw...) + +dimarray_from_dimstack(T, st; kw...) = T([st[D] for D in DimIndices(st)]; dims=dims(st), metadata=metadata(st), kw...) data(s::AbstractDimStack) = getfield(s, :data) @@ -101,7 +106,7 @@ and an existing stack. # Keywords -Keywords are simply the fields of the stack object: +Keywords are simply the common fields of an `AbstractDimStack` object: - `data` - `dims` @@ -109,6 +114,8 @@ Keywords are simply the fields of the stack object: - `metadata` - `layerdims` - `layermetadata` + +There is no promise that these keywords will be used in all cases. """ function rebuild_from_arrays( s::AbstractDimStack{Keys}, das::Tuple{Vararg{AbstractBasicDimArray}}; kw... @@ -340,6 +347,7 @@ end """ DimStack <: AbstractDimStack + DimStack(table, [dims]; kw...) DimStack(data::AbstractDimArray...; kw...) DimStack(data::Union{AbstractArray,Tuple,NamedTuple}, [dims::DimTuple]; kw...) DimStack(data::AbstractDimArray; layersfrom, kw...) @@ -512,7 +520,7 @@ function DimStack(das::NamedTuple{<:Any,<:Tuple{Vararg{AbstractDimArray}}}; end DimStack(data::Union{Tuple,AbstractArray,NamedTuple}, dim::Dimension; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), (dim,); kw...) -DimStack(data::Union{Tuple,AbstractArray}, dims::Tuple; name=uniquekeys(data), kw...) = +DimStack(data::Union{Tuple,AbstractArray{<:AbstractArray}}, dims::Tuple; name=uniquekeys(data), kw...) = DimStack(NamedTuple{Tuple(name)}(data), dims; kw...) function DimStack(data::NamedTuple{K}, dims::Tuple; refdims=(), @@ -520,6 +528,9 @@ function DimStack(data::NamedTuple{K}, dims::Tuple; layermetadata=nothing, layerdims=nothing ) where K + if length(data) > 0 && Tables.istable(data) && all(d -> name(d) in keys(data), dims) + return dimstack_from_table(DimStack, data, dims; refdims, metadata) + end layerdims = if isnothing(layerdims) all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() map(_ -> basedims(dims), data) @@ -546,6 +557,53 @@ function DimStack(st::AbstractDimStack; DimStack(data, dims, refdims, layerdims, metadata, layermetadata) end +# Write each column from a table with one or more coordinate columns to a layer in a DimStack +function DimStack(data, dims::Tuple; kw... +) + if Tables.istable(data) + table = Tables.columns(data) + all(map(d -> Dimensions.name(d) in Tables.columnnames(table), dims)) || throw(ArgumentError( + "All dimensions in dims must be in the table columns." + )) + dims = guess_dims(table, dims; kw...) + return dimstack_from_table(DimStack, table, dims; kw...) + else + throw(ArgumentError( + """data must be a table with coordinate columns, an AbstractArray, + or a Tuple or NamedTuple of AbstractArrays""" + )) + + end +end +function DimStack(table; kw...) + if Tables.istable(table) + table = Tables.columns(table) + dimstack_from_table(DimStack, table, guess_dims(table; kw...); kw...) + else + throw(ArgumentError( + """data must be a table with coordinate columns, an AbstractArray, + or a Tuple or NamedTuple of AbstractArrays""" + )) end +end + +function dimstack_from_table(::Type{T}, table, dims; + name=nothing, + selector=nothing, + precision=6, + missingval=missing, + kw... +) where T<:AbstractDimStack + table = Tables.columnaccess(table) ? table : Tables.columns(table) + data_cols = isnothing(name) ? data_col_names(table, dims) : name + dims = guess_dims(table, dims; precision) + indices = coords_to_indices(table, dims; selector) + layers = map(data_cols) do col + d = Tables.getcolumn(table, col) + restore_array(d, indices, dims, missingval) + end + return T(layers, dims; name = data_cols, kw...) +end + layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) ### Skipmissing on DimStacks @@ -573,4 +631,4 @@ Base.eltype(::Type{Base.SkipMissing{T}}) where {T<:AbstractDimStack{<:Any, NT}} _nonmissing_nt(NT) @generated _nonmissing_nt(NT::Type{<:NamedTuple{K,V}}) where {K,V} = - NamedTuple{K, Tuple{map(Base.nonmissingtype, V.parameters)...}} \ No newline at end of file + NamedTuple{K, Tuple{map(Base.nonmissingtype, V.parameters)...}} diff --git a/src/table_ops.jl b/src/table_ops.jl new file mode 100644 index 000000000..38ad85dc9 --- /dev/null +++ b/src/table_ops.jl @@ -0,0 +1,211 @@ +#= +Restore a dimensional array from its tabular representation. + +- `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. +- `indices`: An `AbstractVector` containing the dimensional indices corresponding to each element in `data`. +- `dims`: The dimensions of the destination `DimArray`. +- `missingval`: The value to write for missing elements in `data`. + +# Returns + +An `Array` containing the ordered valued in `data` with the size specified by `dims`. +=# +function restore_array(data::AbstractVector, indices::AbstractVector, dims::Tuple, missingval) + # Allocate Destination Array + dst = DimArray{eltype(data)}(undef, dims) + for (idx, d) in zip(indices, data) + dst[idx] = d + end + + if length(indices) !== length(dst) + # Handle Missing Rows + _missingval = _cast_missing(data, missingval) + missing_rows = trues(dims) + for idx in indices # looping is faster than broadcasting + missing_rows[idx] = false + end + return ifelse.(missing_rows, _missingval, dst) + end + return dst +end + +#= + coords_to_indices(table, dims; [selector, atol]) + +Return the dimensional index of each row in `table` based on its associated coordinates. +Dimension columns are determined from the name of each dimension in `dims`. + +# Arguments + +- a table +- `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. + +# Keywords + +- `selector`: The selector type to use. This defaults to `Near()` for orderd, sampled dimensions + and `At()` for all other dimensions. +- `atol`: The absolute tolerance to use with `At()`. This defaults to `1e-6`. +=# +coords_to_indices(table, dims::Tuple; selector=nothing, atol=1e-6) = + _coords_to_indices(table, dims, selector, atol) + +#= + guess_dims(table; kw...) + guess_dims(table, dims; precision=6) + +Guesses the dimensions of an array based on the provided tabular representation. + +# Arguments + +- a table +The dimensions will be inferred from the corresponding coordinate collumns in the table. + +- `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default +to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular +value or as a `Pair` with both the dimensions and corresponding order. The order will be inferred from the data +when none is given. This should work for sorted coordinates, but will not be sufficient when the table's rows are +out of order. + +# Keywords + +- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). + +# Returns +A tuple containing the inferred dimensions from the table. +=# +guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) +guess_dims(table, dims::Tuple; precision=6, kw...) = + map(dim -> _guess_dims(get_column(table, name(dim)), dim, precision), dims) + +#Retrieve the coordinate data stored in the column specified by `dim`. +get_column(table, x) = Tables.getcolumn(table, name(x)) + + +#Return the names of all columns that don't match the dimensions given by `dims`. +function data_col_names(table, dims::Tuple) + dim_cols = name(dims) + return filter(x -> !(x in dim_cols), Tables.columnnames(table)) +end + +_guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, args...) = + _guess_dims(coords, name(dim), args...) +_guess_dims(coords::AbstractVector, dim::Pair, args...) = + _guess_dims(coords, first(dim), last(dim), args...) +function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: Order} + return _guess_dims(coords, dim, T(), precision) +end +function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) + dim_vals = _dim_vals(coords, dim, precision) + return format(Dim{dim}(dim_vals)) +end +function _guess_dims(coords::AbstractVector, dim::Type{<:Dimension}, precision::Int) + dim_vals = _dim_vals(coords, dim, precision) + return format(dim(dim_vals)) +end +function _guess_dims(coords::AbstractVector, dim::Dimension, precision::Int) + newl = _guess_dims(coords, lookup(dim), precision) + return format(rebuild(dim, newl)) +end +function _guess_dims(coords::AbstractVector, l::Lookup, precision::Int) + dim_vals = _dim_vals(coords, l, precision) + return rebuild(l; data = dim_vals) +end +# lookup(dim) could just return a vector - then we keep those values +_guess_dims(coords::AbstractVector, l::AbstractVector, precision::Int) = l + +# Extract coordinate columns from table +function _dim_cols(table, dims::Tuple) + dim_cols = name(dims) + return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) +end + +# Extract dimension column names from the given table +_dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) +_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), name(dims)) + +_coords_to_indices(table, dims::Tuple, sel, atol) = + _coords_to_indices(_dim_cols(table, dims), dims, sel, atol) +# Determine the ordinality of a set of coordinates +function _coords_to_indices(coords::Tuple, dims::Tuple, sel, atol) + map(zip(coords...)) do coords + map(coords, dims) do c, d + _coords_to_indices(c, d, sel, atol) + end + end +end +_coords_to_indices(coords::NamedTuple, dims::Tuple, sel, atol) = _coords_to_indices(map(x -> coords[x], name(dims)), dims, sel, atol) +# implement some default selectors +_coords_to_indices(coord, dim::Dimension, sel::Nothing, atol) = + _coords_to_indices(coord, dim, _default_selector(dim), atol) + +# get indices of the coordinates +_coords_to_indices(coord, dim::Dimension, sel::Selector, atol) = + return rebuild(dim, selectindices(dim, rebuild(sel, coord))) +# get indices of the coordinates +_coords_to_indices(coord, dim::Dimension, sel::At, atol) = + return rebuild(dim, selectindices(dim, rebuild(sel; val = coord, atol))) + +function _default_selector(dim::Dimension{<:AbstractSampled}) + if sampling(dim) isa Intervals + Contains() + elseif isordered(dim) && !(eltype(dim) <: Integer) + Near() + else + At() + end +end +_default_selector(dim::Dimension{<:AbstractCategorical}) = At() +_default_selector(dim::Dimension) = Near() + +# Extract dimension value from the given vector of coordinates +function _dim_vals(coords::AbstractVector, dim, precision::Int) + vals = _unique_vals(coords, precision) + return _maybe_as_range(vals, precision) +end +function _dim_vals(coords::AbstractVector, l::Lookup, precision::Int) + val(l) isa AutoValues || return val(l) # do we want to have some kind of check that the values match? + vals = _unique_vals(coords, precision) + _maybe_order!(vals, order(l)) + return _maybe_as_range(vals, precision) +end +_dim_vals(coords::AbstractVector, l::AbstractVector, precision::Int) = l # same comment as above? + +_maybe_order!(A::AbstractVector, ::Order) = A +_maybe_order!(A::AbstractVector, ::ForwardOrdered) = sort!(A) +_maybe_order!(A::AbstractVector, ::ReverseOrdered) = sort!(A, rev=true) + +# Extract all unique coordinates from the given vector +_unique_vals(coords::AbstractVector, ::Int) = unique(coords) +_unique_vals(coords::AbstractVector{<:Real}, precision::Int) = round.(coords, digits=precision) |> unique +_unique_vals(coords::AbstractVector{<:Integer}, ::Int) = unique(coords) + +# Estimate the span between consecutive coordinates +_maybe_as_range(A::AbstractVector, precision) = A # for non-numeric types +function _maybe_as_range(A::AbstractVector{<:Real}, precision::Int) + A_r = range(first(A), last(A), length(A)) + atol = 10.0^(-precision) + return all(i -> isapprox(A_r[i], A[i]; atol), eachindex(A)) ? A_r : A +end +function _maybe_as_range(A::AbstractVector{<:Integer}, precision::Int) + idx1, idxrest = Iterators.peel(eachindex(A)) + step = A[idx1+1] - A[idx1] + for idx in idxrest + A[idx] - A[idx-1] == step || return A + end + return first(A):step:last(A) +end +function _maybe_as_range(A::AbstractVector{<:Dates.AbstractTime}, precision::Int) + steps = (@view A[2:end]) .- (@view A[1:end-1]) + span = argmin(abs, steps) + isregular = all(isinteger, round.(steps ./ span, digits=precision)) + return isregular ? range(first(A), last(A), length(A)) : A +end + +_cast_missing(::AbstractArray, missingval::Missing) = missing +function _cast_missing(::AbstractArray{T}, missingval) where {T} + try + return convert(T, missingval) + catch e + return missingval + end +end diff --git a/src/tree/tree.jl b/src/tree/tree.jl index 33c6cdc03..61c9ae6c5 100644 --- a/src/tree/tree.jl +++ b/src/tree/tree.jl @@ -58,7 +58,12 @@ layerdims(dt::AbstractDimTree, key::Symbol) = layerdims(dt)[key] layers(dt::AbstractDimTree) = DataDict((pn => dt[pn] for pn in keys(dt))) # DimStack constructors on DimTree -function (::Type{T})(dt::AbstractDimTree; keep=nothing) where {T<:AbstractDimStack} +# If this method has ambiguities, define it for the DimStack type and call dimstack_from_tree +(::Type{T})(dt::AbstractDimTree; kw...) where {T<:AbstractDimStack} = + dimstack_from_tree(T, dt; kw...) +DimStack(dt::AbstractDimTree; kw...) = dimstack_from_tree(DimStack, dt; kw...) + +function dimstack_from_tree(T, dt; keep=nothing) if isnothing(keep) pruned = DD.prune(dt; keep) T(pruned[Tuple(keys(pruned))]) @@ -430,4 +435,4 @@ end layerdims(layers::AbstractArray{<:Pair}) = TupleDict(map(((k, v),) -> k => basedims(v), layers)) layermetadata(layers::AbstractArray{<:Pair}) = - DataDict(map(((k, v),) -> k => metadata(v), layers)) \ No newline at end of file + DataDict(map(((k, v),) -> k => metadata(v), layers)) diff --git a/test/stack.jl b/test/stack.jl index 9bf9bde40..0a1ed5045 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -34,9 +34,8 @@ mixed = DimStack(da1, da2, da4) DimStack((da1[:, 1], da2[:, 1], da3[:, 1]); name=(:one, :two, :three)) == DimStack(da1[:, 1], da2[:, 1], da3[:, 1]; name=(:one, :two, :three)) == DimStack(parent.([da1[:, 1], da2[:, 1], da3[:, 1]]), dimz[1]; name=(:one, :two, :three)) == s[:, 1] - @test dims(DimStack()) == dims(DimStack(NamedTuple())) == - dims(DimStack(())) == dims(DimStack(DimArray[])) == - dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () + @test dims(DimStack()) == dims(DimStack(())) == dims(DimStack(DimArray[])) == + dims(DimStack(NamedTuple())) == dims(DimStack((), ())) == dims(DimStack(Array[], ())) == () @test DimStack([A, 2A, 3A], (Z(), Ti()); name=(:one, :two, :three), layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == DimStack((A, 2A, 3A), (Z(), Ti()); name=(:one, :two, :three), layerdims=(one=(Z(), Ti()), two=(Z(), Ti()), three=(Z(), Ti()))) == DimStack((one=A, two=2A, three=3A), (Z(), Ti()); layerdims=[(Z(), Ti()), (Z(), Ti()), (Z(), Ti())]) == @@ -412,4 +411,4 @@ end cs2 = collect(skipmissing(s2)) @test all(getindex.(cs2, :two) .== 1) @test getindex.(cs2, :one) == da1[X=2] -end \ No newline at end of file +end diff --git a/test/tables.jl b/test/tables.jl index 26466fb4f..3b7a47994 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -2,6 +2,7 @@ using DataFrames using Dates using DimensionalData using IteratorInterfaceExtensions +using Random using TableTraits using Tables using Test @@ -161,6 +162,107 @@ end @test Tables.columnnames(t4) == (:band, :geometry, :value) end +@testset "Materialize from table" begin + a = DimArray(rand(UInt8, 100, 100), (X(100:-1:1), Y(-250:5:249))) + b = DimArray(rand(Float32, 100, 100), (X(100:-1:1), Y(-250:5:249))) + c = DimArray(rand(Float64, 100, 100), (X(100:-1:1), Y(-250:5:249))) + ds = DimStack((a=a, b=b, c=c)) + t = DataFrame(ds) + t1 = Random.shuffle(t) + t2 = filter(r -> r.Y != -250, t) + t3 = copy(t1) + t3.X .+= rand(nrow(t1)) .* 1e-7 # add some random noise to check if precision works + + tabletypes = (Tables.rowtable, Tables.columntable, DataFrame) + + for type in tabletypes + t = type(t) + t1 = type(t1) + t2 = type(t2) + t3 = type(t3) + @testset "All dimensions passed (using $type)" begin + # Restore DimArray from shuffled table + for table = (t1, t3) + @test all(DimArray(table, dims(ds)) .== a) + @test all(DimArray(table, dims(ds), name="a") .== a) + @test all(DimArray(table, dims(ds), name="b") .== b) + @test all(DimArray(table, dims(ds), name="c") .== c) + end + + # Restore DimArray from table with missing rows + @test all(DimArray(t2, dims(ds), name="a")[Y(2:100)] .== a[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="b")[Y(2:100)] .== b[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="c")[Y(2:100)] .== c[Y(2:100)]) + @test DimArray(t2, dims(ds), name="a")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="b")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="c")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="a")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="b")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="c")[Y(2:100)] .|> ismissing .|> (!) |> all + + # Restore DimStack from shuffled table + restored_stack = DimStack(t1, dims(ds)) + @test all(restored_stack.a .== ds.a) + @test all(restored_stack.b .== ds.b) + @test all(restored_stack.c .== ds.c) + + # Restore DimStack from table with missing rows + restored_stack = DimStack(t2, dims(ds)) + @test all(restored_stack.a[Y(2:100)] .== ds.a[Y(2:100)]) + @test all(restored_stack.b[Y(2:100)] .== ds.b[Y(2:100)]) + @test all(restored_stack.c[Y(2:100)] .== ds.c[Y(2:100)]) + @test restored_stack.a[Y(1)] .|> ismissing |> all + @test restored_stack.b[Y(1)] .|> ismissing |> all + @test restored_stack.c[Y(1)] .|> ismissing |> all + @test restored_stack.a[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.b[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.c[Y(2:100)] .|> ismissing .|> (!) |> all + end + + @testset "Dimensions automatically detected (using $type)" begin + da3 = DimArray(t) + # Awkward test, see https://github.com/rafaqz/DimensionalData.jl/issues/953 + # If Dim{:X} == X then we can just test for equality + @test lookup(dims(da3, :X)) == lookup(dims(a, X)) + @test lookup(dims(da3, :Y)) == lookup(dims(a, Y)) + @test parent(da3) == parent(a) + + for table in (t1, t3) + da = DimArray(table) + @test parent(da[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + ds_ = DimStack(table) + @test keys(ds_) == (:a, :b, :c) + @test parent(ds_.a[X = At(100:-1:1), Y = At(-250:5:249)]) == parent(a) + + end + end + + @testset "Dimensions partially specified (using $type)" begin + for table in (t1, t3) + # setting the order returns ordered dimensions + da = DimArray(table, (X(Sampled(order = ReverseOrdered())), Y(Sampled(order=ForwardOrdered())))) + @test dims(da, X) == dims(a, X) + @test dims(da, Y) == dims(a, Y) + end + # passing in dimension types works + @test DimArray(t, (X, Y)) == a + @test parent(DimArray(t, (:X, Y))) == parent(a) + @test parent(DimArray(t, (:X, :Y))) == parent(a) + # passing in dimensions works for unconventional dimension names + A = rand(dimz, name = :a) + table = type(A) + @test DimArray(table, (X, Y(Sampled(span = Irregular())), :test)) == A + # Specifying dimensions types works even if it's illogical. + dat = DimArray(t, (X(Sampled(span = Irregular(), order = Unordered())), Y(Categorical()))) + x, y = dims(dat) + @test !isregular(x) + @test !isordered(x) + @test iscategorical(y) + @test isordered(y) # this is automatically detected + end + end +end + @testset "DimTable preservedims" begin x, y, t = X(1.0:32.0), Y(1.0:10.0), Ti(DateTime.([2001, 2002, 2003])) st = DimStack([rand(x, y, t; name) for name in [:a, :b, :c]])