From 5e4fb7248b8ffc2659e5342dfe01d0de6b0c1476 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Sun, 16 Dec 2018 13:29:12 +0000 Subject: [PATCH 01/21] wip --- REQUIRE | 3 +- src/columns.jl | 266 +++++++++---------------------------------------- 2 files changed, 51 insertions(+), 218 deletions(-) diff --git a/REQUIRE b/REQUIRE index 8bf6de22..c61eff94 100644 --- a/REQUIRE +++ b/REQUIRE @@ -5,4 +5,5 @@ WeakRefStrings 0.4.4 TableTraits 0.3.0 TableTraitsUtils 0.2.0 IteratorInterfaceExtensions 0.1.0 -Tables \ No newline at end of file +Tables +StructArrays diff --git a/src/columns.jl b/src/columns.jl index 9b443671..a0336e61 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -1,47 +1,9 @@ -""" -Wrapper around a (named) tuple of Vectors that acts like a Vector of (named) tuples. - -# Fields: - -- `columns`: a (named) tuple of Vectors. Also `columns(x)` -""" -struct Columns{D<:Union{Tup, Pair}, C<:Union{Tup, Pair}} <: AbstractVector{D} - columns::C - - function Columns{D,C}(c) where {D<:Tup,C<:Tup} - if !isempty(c) - n = length(c[1]) - for i = 2:length(c) - length(c[i]) == n || error("all columns must have same length") - end - end - new{D,C}(c) - end - - function Columns{D,C}(c::Pair) where {D<:Pair,C<:Pair{<:AbstractVector, <:AbstractVector}} - length(c.first) == length(c.second) || error("all columns must have same length") - new{D,C}(c) - end -end - -function Columns(cols::AbstractVector...; names::Union{Vector,Tuple{Vararg{Any}},Nothing}=nothing) - if isa(names, Nothing) || any(x->!(x isa Symbol), names) - Columns{eltypes(typeof(cols)),typeof(cols)}(cols) - else - dt = NamedTuple{(names...,), Tuple{map(eltype, cols)...}} - ct = NamedTuple{(names...,), Tuple{map(typeof, cols)...}} - Columns{dt,ct}(ct((cols...,))) - end -end - -function Columns(; kws...) - Columns(values(kws)..., names=collect(keys(kws))) -end - -Columns(c::Union{Tup, Pair}) = Columns{eltypes(typeof(c)),typeof(c)}(c) +using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray +# to get rid of eventually +const Columns = StructVector # There is a StackOverflow bug in this case in Base.unaliascopy -Base.copy(c::Columns{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c +Base.copy(c::StructVector{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c # IndexedTable-like API @@ -69,8 +31,8 @@ function colnames end Base.@pure colnames(t::AbstractVector) = (1,) columns(v::AbstractVector) = v -Base.@pure colnames(t::Columns) = fieldnames(eltype(t)) -Base.@pure colnames(t::Columns{<:Pair, <:Pair}) = colnames(t.columns.first) => colnames(t.columns.second) +Base.@pure colnames(t::StructVector) = fieldnames(eltype(t)) +Base.@pure colnames(t::StructVector{<:Pair, <:Pair}) = colnames(t.first) => colnames(t.second) """ columns(itr, select::Selection = All()) @@ -93,16 +55,9 @@ available selection options and syntax. """ function columns end -columns(c::Columns) = c.columns - -# Array-like API - -eltype(::Type{Columns{D,C}}) where {D,C} = D -function length(c::Columns) - isempty(c.columns) ? 0 : length(c.columns[1]) -end -length(c::Columns{<:Pair, <:Pair}) = length(c.columns.first) -ndims(c::Columns) = 1 +columns(c::StructVector{<:Tuple}) = Tuple(fieldarrays(c)) +columns(c::StructVector{<:NamedTuple}) = fieldarrays(c) +columns(c::StructVector{<:Pair}) = c.first => c.second """ ncols(itr) @@ -115,149 +70,19 @@ Returns the number of columns in `itr`. ncols(rows(([1,2,3],[4,5,6]))) == 2 """ function ncols end -ncols(c::Columns) = fieldcount(typeof(c.columns)) -ncols(c::Columns{<:Pair, <:Pair}) = ncols(c.columns.first) => ncols(c.columns.second) +ncols(c::StructVector) = fieldcount(fieldarrays(c)) +ncols(c::StructVector{<:Pair}) = ncols(c.first) => ncols(c.second) ncols(c::AbstractArray) = 1 -size(c::Columns) = (length(c),) -Base.IndexStyle(::Type{<:Columns}) = IndexLinear() -summary(c::Columns{D}) where {D<:Tuple} = "$(length(c))-element Columns{$D}" - -empty!(c::Columns) = (foreach(empty!, c.columns); c) -empty!(c::Columns{<:Pair, <:Pair}) = (foreach(empty!, c.columns.first.columns); foreach(empty!, c.columns.second.columns); c) - -function similar(c::Columns{D,C}) where {D,C} - cols = _map(similar, c.columns) - Columns{D,typeof(cols)}(cols) -end - -function similar(c::Columns{D,C}, n::Integer) where {D,C} - cols = _map(a->similar(a,n), c.columns) - Columns{D,typeof(cols)}(cols) -end - -function Base.similar(::Type{T}, n::Int)::T where {T<:Columns} - T_cols = T.parameters[2] - if T_cols <: Pair - return Columns(similar(T_cols.parameters[1], n) => similar(T_cols.parameters[2], n)) - end - f = T_cols <: Tuple ? tuple : T_cols∘tuple - T(f(map(t->similar(t, n), fieldtypes(T_cols))...)) -end - -function convert(::Type{Columns}, x::AbstractArray{<:NTuple{N,Any}}) where N - eltypes = (eltype(x).parameters...,) - copyto!(Columns(map(t->Vector{t}(undef, length(x)), eltypes)), x) -end - -function convert(::Type{Columns}, x::AbstractArray{<:NamedTuple{names, typs}}) where {names,typs} - eltypes = typs.parameters - copyto!(Columns(map(t->Vector{t}(undef, length(x)), eltypes)..., names=fieldnames(eltype(x))), x) -end - +summary(c::StructVector{D}) where {D<:Tuple} = "$(length(c))-element StructVector{$D}" -getindex(c::Columns{D}, i::Integer) where {D<:Tuple} = ith_all(i, c.columns) -getindex(c::Columns{D}, i::Integer) where {D<:NamedTuple} = D(ith_all(i, c.columns)) -getindex(c::Columns{D}, i::Integer) where {D<:Pair} = getindex(c.columns.first, i) => getindex(c.columns.second, i) +_sizehint!(c::StructVector, n::Integer) = (foreachfield(x->_sizehint!(x,n), c); c) -getindex(c::Columns, p::AbstractVector) = Columns(_map(c->c[p], c.columns)) - -view(c::Columns, I) = Columns(_map(a->view(a, I), c.columns)) - -@inline setindex!(I::Columns, r::Union{Tup, Pair}, i::Integer) = (foreach((c,v)->(c[i]=v), I.columns, r); I) - -@inline push!(I::Columns, r::Union{Tup, Pair}) = (foreach(push!, I.columns, r); I) - -append!(I::Columns, J::Columns) = (foreach(append!, I.columns, J.columns); I) - -copyto!(I::Columns, J::Columns) = (foreach(copyto!, I.columns, J.columns); I) - -resize!(I::Columns, n::Int) = (foreach(c->resize!(c,n), I.columns); I) - -_sizehint!(c::Columns, n::Integer) = (foreach(c->_sizehint!(c,n), c.columns); c) - -==(x::Columns, y::Columns) = x.columns == y.columns - -function _strip_pair(c::Columns{<:Pair}) - f, s = map(columns, c.columns) +function _strip_pair(c::StructVector{<:Pair}) + f, s = map(columns, fieldarrays(c)) (f isa AbstractVector) && (f = (f,)) (s isa AbstractVector) && (s = (s,)) - Columns(f..., s...) -end - -function sortperm(c::Columns) - cols = c.columns - x = cols[1] - if (eltype(x) <: AbstractString && !(x isa PooledArray)) || length(cols) > 1 - pa = PooledArray(compact_mem(x)) - p = sortperm_fast(pa) - else - p = sortperm_fast(x) - end - if length(cols) > 1 - y = cols[2] - refine_perm!(p, cols, 1, compact_mem(x), compact_mem(y), 1, length(x)) - end - return p -end - -sortperm(c::Columns{<:Pair}) = sortperm(_strip_pair(c)) - -issorted(c::Columns) = issorted(1:length(c), lt=(x,y)->rowless(c, x, y)) -issorted(c::Columns{<:Pair}) = issorted(_strip_pair(c)) - -# assuming x[p] is sorted, sort by remaining columns where x[p] is constant -function refine_perm!(p, cols, c, x, y, lo, hi) - temp = similar(p, 0) - order = Base.Order.By(j->(@inbounds k=y[j]; k)) - nc = length(cols) - i = lo - while i < hi - i1 = i+1 - @inbounds while i1 <= hi && roweq(x, p[i1], p[i]) - i1 += 1 - end - i1 -= 1 - if i1 > i - sort_sub_by!(p, i, i1, y, order, temp) - if c < nc-1 - z = cols[c+2] - refine_perm!(p, cols, c+1, compact_mem(y), compact_mem(z), i, i1) - end - end - i = i1+1 - end -end - -function permute!(c::Columns, p::AbstractVector) - for v in c.columns - if isa(v, PooledArrays.PooledArray) || isa(v, StringArray{String}) - permute!(v, p) - else - copyto!(v, v[p]) - end - end - return c -end -permute!(c::Columns{<:Pair}, p::AbstractVector) = (permute!(c.columns.first, p); permute!(c.columns.second, p); c) -sort!(c::Columns) = permute!(c, sortperm(c)) -sort(c::Columns) = c[sortperm(c)] - -function Base.vcat(c::Columns, cs::Columns...) - fns = map(fieldnames∘typeof, (map(x->x.columns, (c, cs...)))) - f1 = fns[1] - for f2 in fns[2:end] - if f1 != f2 - errfields = join(map(string, fns), ", ", " and ") - throw(ArgumentError("Cannot concatenate columns with fields $errfields")) - end - end - Columns(map(vcat, map(x->x.columns, (c,cs...))...)) -end - -function Base.vcat(c::Columns{<:Pair}, cs::Columns{<:Pair}...) - Columns(vcat(c.columns.first, (x.columns.first for x in cs)...) => - vcat(c.columns.second, (x.columns.second for x in cs)...)) + StructVector((f..., s...)) end # fused indexing operations @@ -273,18 +98,18 @@ end # row operations -copyrow!(I::Columns, i, src) = foreach(c->copyelt!(c, i, src), I.columns) -copyrow!(I::Columns, i, src::Columns, j) = foreach((c1,c2)->copyelt!(c1, i, c2, j), I.columns, src.columns) +copyrow!(I::StructVector, i, src) = foreachfield(c->copyelt!(c, i, src), I) +copyrow!(I::StructVector, i, src::StructVector, j) = foreachfield((c1,c2)->copyelt!(c1, i, c2, j), I, src) copyrow!(I::AbstractArray, i, src::AbstractArray, j) = (@inbounds I[i] = src[j]) -pushrow!(to::Columns, from::Columns, i) = foreach((a,b)->push!(a, b[i]), to.columns, from.columns) +pushrow!(to::StructVector, from::StructVector, i) = foreachfield((a,b)->push!(a, b[i]), to, from) pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i]) -@generated function rowless(c::Columns{D,C}, i, j) where {D,C} +@generated function rowless(c::StructVector{D,C}, i, j) where {D,C} N = fieldcount(C) - ex = :(cmpelts(getfield(c.columns,$N), i, j) < 0) + ex = :(cmpelts(getfield(fieldarrays(c),$N), i, j) < 0) for n in N-1:-1:1 ex = quote - let d = cmpelts(getfield(c.columns,$n), i, j) + let d = cmpelts(getfield(fieldarrays(c),$n), i, j) (d == 0) ? ($ex) : (d < 0) end end @@ -292,11 +117,11 @@ pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i]) ex end -@generated function roweq(c::Columns{D,C}, i, j) where {D,C} +@generated function roweq(c::StructVector{D,C}, i, j) where {D,C} N = fieldcount(C) - ex = :(cmpelts(getfield(c.columns,1), i, j) == 0) + ex = :(cmpelts(getfield(fieldarrays(c),1), i, j) == 0) for n in 2:N - ex = :(($ex) && (cmpelts(getfield(c.columns,$n), i, j)==0)) + ex = :(($ex) && (cmpelts(getfield(fieldarrays(c),$n), i, j)==0)) end ex end @@ -305,12 +130,12 @@ end # uses number of columns from `d`, assuming `c` has more or equal # dimensions, for broadcast joins. -@generated function rowcmp(c::Columns, i, d::Columns{D}, j) where D +@generated function rowcmp(c::StructVector, i, d::StructVector{D}, j) where D N = fieldcount(D) - ex = :(cmp(getfield(c.columns,$N)[i], getfield(d.columns,$N)[j])) + ex = :(cmp(getfield(fieldarrays(c),$N)[i], getfield(fieldarrays(d),$N)[j])) for n in N-1:-1:1 ex = quote - let k = cmp(getfield(c.columns,$n)[i], getfield(d.columns,$n)[j]) + let k = cmp(getfield(fieldarrays(c),$n)[i], getfield(fieldarrays(d),$n)[j]) (k == 0) ? ($ex) : k end end @@ -326,18 +151,18 @@ end # all columns are equal except left >= right in last column. # Could be generalized to some number of trailing columns, but I don't # know whether that has applications. -@generated function row_asof(c::Columns{D,C}, i, d::Columns{D,C}, j) where {D,C} +@generated function row_asof(c::StructVector{D,C}, i, d::StructVector{D,C}, j) where {D,C} N = length(C.parameters) if N == 1 - ex = :(!isless(getfield(c.columns,1)[i], getfield(d.columns,1)[j])) + ex = :(!isless(getfield(fieldarrays(c),1)[i], getfield(fieldarrays(d),1)[j])) else - ex = :(isequal(getfield(c.columns,1)[i], getfield(d.columns,1)[j])) + ex = :(isequal(getfield(fieldarrays(c),1)[i], getfield(fieldarrays(d),1)[j])) end for n in 2:N if N == n - ex = :(($ex) && !isless(getfield(c.columns,$n)[i], getfield(d.columns,$n)[j])) + ex = :(($ex) && !isless(getfield(fieldarrays(c),$n)[i], getfield(fieldarrays(d),$n)[j])) else - ex = :(($ex) && isequal(getfield(c.columns,$n)[i], getfield(d.columns,$n)[j])) + ex = :(($ex) && isequal(getfield(fieldarrays(c),$n)[i], getfield(fieldarrays(d),$n)[j])) end end ex @@ -357,11 +182,11 @@ elementwise. Collect output as `Columns` if `f` returns map_rows(i -> (exp = exp(i), log = log(i)), 1:5) """ function map_rows(f, iters...) - collect_columns(f(i...) for i in zip(iters...)) + collect_structarray(f(i...) for i in zip(iters...)) end # 1-arg case -map_rows(f, iter) = collect_columns(f(i) for i in iter) +map_rows(f, iter) = collect_structarray(f(i) for i in iter) ## Special selectors to simplify column selector @@ -496,8 +321,8 @@ end column(c, x) = columns(c)[colindex(c, x)] # optimized method -@inline function column(c::Columns, x::Union{Int, Symbol}) - getfield(c.columns, x) +@inline function column(c::StructVector, x::Union{Int, Symbol}) + getfield(fieldarrays(c), x) end column(t, a::AbstractArray) = a @@ -568,11 +393,11 @@ the [`select`](@ref) function for selection options and syntax. function rows end rows(x::AbstractVector) = x -rows(cols::Tup) = Columns(cols) +rows(cols::Tup) = StructVector(cols) rows(t, which...) = rows(columns(t, which...)) -_cols_tuple(xs::Columns) = columns(xs) +_cols_tuple(xs::StructVector) = columns(xs) _cols_tuple(xs::AbstractArray) = (xs,) concat_cols(xs, ys) = rows(concat_tup(_cols_tuple(xs), _cols_tuple(ys))) @@ -602,8 +427,15 @@ function ColDict(t; copy=nothing) ColDict(Int[], t, convert(Array{Any}, collect(cnames)), Any[columns(t)...], copy) end -function Base.getindex(d::ColDict{<:Columns}) - Columns(d.columns...; names=d.names) +function structvector_columnsnames(cols::AbstractVector...; names = Symbol[]) + if all(t -> isa(t, Symbol), names) && length(names) == length(cols) + StructVector(NamedTuple{Tuple(names)}(cols)) + else + StructVector(cols) + end +end +function Base.getindex(d::ColDict{<:StructVector}) + structvector_columnsnames(d.columns; names=d.names) end Base.getindex(d::ColDict, key) = rows(d[], key) @@ -934,4 +766,4 @@ end ### utils -compact_mem(x::Columns) = Columns(map(compact_mem, columns(x))) +compact_mem(x::StructVector) = StructVector(map(compact_mem, columns(x))) From c79b9735065c4ea83bbc14befc29949b8c78bfec Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Sun, 16 Dec 2018 13:54:33 +0000 Subject: [PATCH 02/21] temporarily remove extra column method --- src/tables.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tables.jl b/src/tables.jl index eedb264b..816aa767 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,7 +1,7 @@ #-----------------------------------------------------------------------# Columns const TableColumns = Columns{T} where {T<:NamedTuple} -Columns(x; kw...) = Columns(Tables.columntable(x); kw...) +# Columns(x; kw...) = Columns(Tables.columntable(x); kw...) Tables.istable(::Type{<:TableColumns}) = true Tables.materializer(c::TableColumns) = Columns From 1eac5fae70a8dc4c6a619a2adaf7b70e1908522a Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Sun, 16 Dec 2018 15:51:24 +0000 Subject: [PATCH 03/21] update arrayof --- src/IndexedTables.jl | 2 ++ src/columns.jl | 2 -- src/utils.jl | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/IndexedTables.jl b/src/IndexedTables.jl index 9cbee999..29ce44c6 100644 --- a/src/IndexedTables.jl +++ b/src/IndexedTables.jl @@ -14,6 +14,8 @@ import Base: tuple_type_cons, tuple_type_head, tuple_type_tail, in, convert +using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema + #-----------------------------------------------------------------------# exports export # macros diff --git a/src/columns.jl b/src/columns.jl index a0336e61..d434178e 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -1,5 +1,3 @@ -using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray - # to get rid of eventually const Columns = StructVector # There is a StackOverflow bug in this case in Base.unaliascopy diff --git a/src/utils.jl b/src/utils.jl index a2465847..08a22afe 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -168,18 +168,18 @@ Base.@pure function arrayof(S) if T == Union{} Vector{Union{}} elseif T<:Tuple - Columns{T, Tuple{map(arrayof, fieldtypes(T))...}} + StructVector{T, staticschema(Tuple{map(arrayof, fieldtypes(T))...})} elseif T<:NamedTuple if fieldcount(T) == 0 - Columns{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}} + StructVector{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}} else - Columns{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}} + StructVector{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}} end elseif (T<:Union{Missing,String,WeakRefString} && Missing<:T) || T<:Union{String, WeakRefString} StringArray{T, 1} elseif T<:Pair - Columns{T, Pair{map(arrayof, T.parameters)...}} + StructVector{T, NamedTuple{(:first, :second), Tuple{map(arrayof, T.parameters)...}}} else Vector{T} end From d41ed577985c994c07e035c155436f8bf1b159e4 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Sun, 16 Dec 2018 16:33:59 +0000 Subject: [PATCH 04/21] fix ncols --- src/columns.jl | 2 +- src/indexedtable.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/columns.jl b/src/columns.jl index d434178e..f1844f75 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -68,7 +68,7 @@ Returns the number of columns in `itr`. ncols(rows(([1,2,3],[4,5,6]))) == 2 """ function ncols end -ncols(c::StructVector) = fieldcount(fieldarrays(c)) +ncols(c::StructVector{T, C}) where {T, C} = fieldcount(C) ncols(c::StructVector{<:Pair}) = ncols(c.first) => ncols(c.second) ncols(c::AbstractArray) = 1 diff --git a/src/indexedtable.jl b/src/indexedtable.jl index dab871ce..0c7efede 100644 --- a/src/indexedtable.jl +++ b/src/indexedtable.jl @@ -384,12 +384,12 @@ keyword arguments will be forwarded to [`table`](@ref) constructor. # Example convert(IndexedTable, Columns(x=[1,2],y=[3,4]), Columns(z=[1,2]), presorted=true) """ -function convert(::Type{IndexedTable}, key, val; kwargs...) +function convert(::Type{IndexedTable}, key, val; pkey = (), kwargs...) cs = concat_cols(key, val) - table(cs, pkey=[1:ncols(key);]; kwargs...) + table(cs; pkey=Tuple(1:ncols(key)), kwargs...) end -convert(T::Type{IndexedTable}, c::Columns{<:Pair}; kwargs...) = convert(T, c.columns.first, c.columns.second; kwargs...) +convert(T::Type{IndexedTable}, c::Columns{<:Pair}; kwargs...) = convert(T, c.first, c.second; kwargs...) # showing global show_compact_when_wide = true From 828cee598d488f3e567b41fea99b56d827262993 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Sun, 16 Dec 2018 16:36:48 +0000 Subject: [PATCH 05/21] fix ndims --- src/ndsparse.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ndsparse.jl b/src/ndsparse.jl index 6109e431..cd0d4467 100644 --- a/src/ndsparse.jl +++ b/src/ndsparse.jl @@ -209,7 +209,7 @@ _convert(::Type{<:Tuple}, tup::Tuple) = tup _convert(::Type{T}, tup::Tuple) where {T<:NamedTuple} = T(tup) convertkey(t::NDSparse{V,K,I}, tup::Tuple) where {V,K,I} = _convert(eltype(I), tup) -ndims(t::NDSparse) = length(t.index.columns) +ndims(t::NDSparse) = length(fieldarrays(t.index)) length(t::NDSparse) = (flush!(t);length(t.index)) eltype(::Type{NDSparse{T,D,C,V}}) where {T,D,C,V} = T Base.keytype(::Type{NDSparse{T,D,C,V}}) where {T,D,C,V} = D From 46d9d7a27150bad6913c341be76bdb2915cb93a8 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Sun, 16 Dec 2018 16:48:56 +0000 Subject: [PATCH 06/21] fix stack unstack --- src/reshape.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reshape.jl b/src/reshape.jl index aa540c10..db014093 100644 --- a/src/reshape.jl +++ b/src/reshape.jl @@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t labelcol = [label for i in 1:length(t) for label in labels] bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by)) - convert(collectiontype(D), Columns(bycols), Columns(labelcol, valuecol, names = [variable, value])) + convert(collectiontype(D), StructVector(bycols), structvector_columnsnames(labelcol, valuecol, names = [variable, value])) end """ @@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value) end function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S} - dest_val = Columns((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) + dest_val = structvector_columnsnames((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) for (i, el) in enumerate(val) for (k, v) in el ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed") From 43734214807f8e5a12f27c1c331f75146443754d Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 18:09:42 +0000 Subject: [PATCH 07/21] fix more columns --- src/join.jl | 12 ++++++------ test/test_core.jl | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/join.jl b/src/join.jl index 1ac36a2b..0f86b83b 100644 --- a/src/join.jl +++ b/src/join.jl @@ -342,7 +342,7 @@ function Base.join(f, left::Dataset, right::Dataset; lnulls = zeros(Bool, length(lout)) lnulls[lnull_idx] .= true lout = if lout isa Columns - Columns(map(lout.columns) do col + Columns(map(columns(lout)) do col v = convert(Vector{Union{Missing, eltype(col)}}, col) v[lnull_idx] .= missing v @@ -359,7 +359,7 @@ function Base.join(f, left::Dataset, right::Dataset; rnulls = zeros(Bool, length(rout)) rnulls[rnull_idx] .= true rout = if rout isa Columns - Columns(map(rout.columns) do col + Columns(map(columns(rout)) do col v = convert(Vector{Union{Missing, eltype(col)}}, col) v[rnull_idx] .= missing v @@ -512,7 +512,7 @@ function count_overlap(I::Columns{D}, J::Columns{D}) where D end function promoted_similar(x::Columns, y::Columns, n) - Columns(map((a,b)->promoted_similar(a, b, n), x.columns, y.columns)) + Columns(map((a,b)->promoted_similar(a, b, n), columns(x), columns(y))) end function promoted_similar(x::AbstractArray, y::AbstractArray, n) @@ -697,7 +697,7 @@ function find_corresponding(Ap, Bp) end function match_indices(A::NDSparse, B::NDSparse) - if isa(A.index.columns, NamedTuple) && isa(B.index.columns, NamedTuple) + if isa(columns(A.index), NamedTuple) && isa(columns(B.index), NamedTuple) Ap = colnames(A.index) Bp = colnames(B.index) else @@ -786,7 +786,7 @@ function _broadcast!(f::Function, A::NDSparse, B::NDSparse, C::NDSparse; dimmap= end common = filter(i->C_inds[i] > 0, 1:ndims(A)) C_common = C_inds[common] - B_common_cols = Columns(getsubfields(B.index.columns, common)) + B_common_cols = Columns(getsubfields(columns(B.index), common)) B_perm = sortperm(B_common_cols) if C_common == C_dims idx, iperm = _bcast_loop!(f, values(A), B, C, B_common_cols, B_perm) @@ -797,7 +797,7 @@ function _broadcast!(f::Function, A::NDSparse, B::NDSparse, C::NDSparse; dimmap= end else # TODO - #C_perm = sortperm(Columns(C.index.columns[[C_common...]])) + #C_perm = sortperm(Columns(columns(C.index)[[C_common...]])) error("dimensions of one argument to `broadcast` must be a subset of the dimensions of the other") end return A diff --git a/test/test_core.jl b/test/test_core.jl index e4c17936..b91ccf5d 100644 --- a/test/test_core.jl +++ b/test/test_core.jl @@ -11,8 +11,8 @@ @test map_rows(tuple, 1:3, ["a","b","c"]) == Columns([1,2,3], ["a","b","c"]) c = Columns(Columns((a=[1,2,3],)) => Columns((b=["a","b","c"],))) - @test c.columns.first == Columns((a=[1,2,3],)) - @test c.columns.second == Columns((b=["a","b","c"],)) + @test columns(c).first == Columns((a=[1,2,3],)) + @test columns(c).second == Columns((b=["a","b","c"],)) @test colnames(c) == ((:a,) => (:b,)) @test length(c) == 3 @test ncols(c) == (1 => 1) @@ -107,10 +107,10 @@ end @test broadcast(*, nd, ndv) == convert(NDSparse, S .* v) # test matching dimensions by name ndt0 = convert(NDSparse, sparse(S .* (v'))) - ndt = NDSparse(Columns(a=ndt0.index.columns[1], b=ndt0.index.columns[2]), ndt0.data, presorted=true) + ndt = NDSparse(Columns(a=columns(ndt0.index)[1], b=columns(ndt0.index)[2]), ndt0.data, presorted=true) @test broadcast(*, - NDSparse(Columns(a=nd.index.columns[1], b=nd.index.columns[2]), nd.data), - NDSparse(Columns(b=ndv.index.columns[1]), ndv.data)) == ndt + NDSparse(Columns(a=columns(nd.index)[1], b=columns(nd.index)[2]), nd.data), + NDSparse(Columns(b=columns(ndv.index)[1]), ndv.data)) == ndt let a = rand(10), b = rand(10), c = rand(10) @test NDSparse(a, b, c) == NDSparse(a, b, c) @@ -195,7 +195,7 @@ for a in (rand(2,2), rand(3,5)) end end -_colnames(x::NDSparse) = keys(x.index.columns) +_colnames(x::NDSparse) = keys(columns(x.index)) @test _colnames(NDSparse(ones(2),ones(2),ones(2),names=[:a,:b])) == (:a, :b) @test _colnames(NDSparse(Columns(x=ones(2),y=ones(2)), ones(2))) == (:x, :y) @@ -304,23 +304,23 @@ end t = table(cs) @test t.pkey == Int[] @test t.columns == [(1,2)] - @test column(t.columns,1) !== cs.columns[1] + @test column(t.columns,1) !== columns(cs)[1] t = table(cs, copy=false) - @test column(t.columns,1) === cs.columns[1] + @test column(t.columns,1) === columns(cs)[1] t = table(cs, copy=false, pkey=[1]) - @test column(t.columns,1) === cs.columns[1] + @test column(t.columns,1) === columns(cs)[1] cs = Columns([2, 1], [3,4]) t = table(cs, copy=false, pkey=[1]) @test t.pkey == Int[1] cs = Columns([2, 1], [3,4]) t = table(cs, copy=false, pkey=[1]) - @test column(t.columns,1) === cs.columns[1] + @test column(t.columns,1) === columns(cs)[1] @test t.pkey == Int[1] @test t.columns == [(1,4), (2,3)] cs = Columns(x=[2, 1], y=[3,4]) t = table(cs, copy=false, pkey=:x) - @test column(t.columns,1) === cs.columns.x + @test column(t.columns,1) === columns(cs).x @test t.pkey == Int[1] @test t.columns == [(x=1,y=4), (x=2,y=3)] @@ -614,7 +614,7 @@ end t2 = map(x->(x.x,x.x^2), t) @test isa(t2.data, Columns) - @test isa(t2.data.columns, Tuple{Vector{Int}, Vector{Int}}) + @test isa(columns(t2.data), Tuple{Vector{Int}, Vector{Int}}) t3 = map(x->ntuple(identity, x.x), t) @test isa(t3.data, Vector) @@ -1054,10 +1054,10 @@ end C = rand(3,3) nA = convert(NDSparse, A) nB = convert(NDSparse, B) - nB.index.columns[1][:] .+= 3 + columns(nB.index)[1][:] .+= 3 @test merge(nA,nB) == convert(NDSparse, vcat(A,B)) nC = convert(NDSparse, C) - nC.index.columns[1][:] .+= 6 + columns(nC.index)[1][:] .+= 6 @test merge(nA,nB,nC) == merge(nA,nC,nB) == convert(NDSparse, vcat(A,B,C)) merge!(nA,nB) @test nA == convert(NDSparse, vcat(A,B)) From 91b2c333703e190eef66d523ebf72c17452ab6bc Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 18:39:32 +0000 Subject: [PATCH 08/21] use new StructVector method --- src/columns.jl | 11 ++--------- src/reshape.jl | 4 ++-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/columns.jl b/src/columns.jl index f1844f75..1078cacc 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -30,7 +30,7 @@ Base.@pure colnames(t::AbstractVector) = (1,) columns(v::AbstractVector) = v Base.@pure colnames(t::StructVector) = fieldnames(eltype(t)) -Base.@pure colnames(t::StructVector{<:Pair, <:Pair}) = colnames(t.first) => colnames(t.second) +Base.@pure colnames(t::StructVector{<:Pair}) = colnames(t.first) => colnames(t.second) """ columns(itr, select::Selection = All()) @@ -425,15 +425,8 @@ function ColDict(t; copy=nothing) ColDict(Int[], t, convert(Array{Any}, collect(cnames)), Any[columns(t)...], copy) end -function structvector_columnsnames(cols::AbstractVector...; names = Symbol[]) - if all(t -> isa(t, Symbol), names) && length(names) == length(cols) - StructVector(NamedTuple{Tuple(names)}(cols)) - else - StructVector(cols) - end -end function Base.getindex(d::ColDict{<:StructVector}) - structvector_columnsnames(d.columns; names=d.names) + Columns(d.columns...; names=d.names) end Base.getindex(d::ColDict, key) = rows(d[], key) diff --git a/src/reshape.jl b/src/reshape.jl index db014093..7960c4aa 100644 --- a/src/reshape.jl +++ b/src/reshape.jl @@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t labelcol = [label for i in 1:length(t) for label in labels] bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by)) - convert(collectiontype(D), StructVector(bycols), structvector_columnsnames(labelcol, valuecol, names = [variable, value])) + convert(collectiontype(D), StructVector(bycols), StructVector(labelcol, valuecol, names = [variable, value])) end """ @@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value) end function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S} - dest_val = structvector_columnsnames((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) + dest_val = StructVector((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) for (i, el) in enumerate(val) for (k, v) in el ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed") From 2b0d7d97f8506ad86c9758c1ff70f2ce93e07544 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 19:23:58 +0000 Subject: [PATCH 09/21] fix more columns --- src/ndsparse.jl | 12 ++++++------ src/reduce.jl | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ndsparse.jl b/src/ndsparse.jl index cd0d4467..44752251 100644 --- a/src/ndsparse.jl +++ b/src/ndsparse.jl @@ -88,7 +88,7 @@ function ndsparse(::Val{:serial}, ks::Tup, vs::Union{Tup, AbstractVector}; elseif copy if agg !== nothing iter = GroupReduce(agg, I, d, Base.OneTo(length(I))) - I, d = collect_columns(iter).columns + I, d = collect_columns(iter) |> columns agg = nothing else I = Base.copy(I) @@ -117,7 +117,7 @@ function ndsparse(x::Columns, y::AbstractVector; kwargs...) end ndsparse(c::Columns{<:Pair}; kwargs...) = - convert(NDSparse, c.columns.first, c.columns.second; kwargs...) + convert(NDSparse, columns(c).first, columns(c).second; kwargs...) # backwards compat NDSparse(idx::Columns, data; kwargs...) = ndsparse(idx, data; kwargs...) @@ -267,7 +267,7 @@ function permutedims(t::NDSparse, p::AbstractVector) throw(ArgumentError("argument to permutedims must be a valid permutation")) end flush!(t) - NDSparse(Columns(t.index.columns[p]), t.data, copy=true) + NDSparse(Columns(columns(t.index)[p]), t.data, copy=true) end # showing @@ -312,7 +312,7 @@ function showmeta(io, t::NDSparse, cnames) end @noinline convert(::Type{NDSparse}, @nospecialize(ks), @nospecialize(vs); kwargs...) = ndsparse(ks, vs; kwargs...) -@noinline convert(T::Type{NDSparse}, c::Columns{<:Pair}; kwargs...) = convert(T, c.columns.first, c.columns.second; kwargs...) +@noinline convert(T::Type{NDSparse}, c::Columns{<:Pair}; kwargs...) = convert(T, columns(c).first, columns(c).second; kwargs...) # map and convert @@ -344,9 +344,9 @@ end # Given an NDSparse array with multiple data columns (its data vector is a `Columns` object), return a # new array with the specified subset of data columns. Data is shared with the original array. # """ -# columns(x::NDSparse, which...) = NDSparse(x.index, Columns(x.data.columns[[which...]]), presorted=true) +# columns(x::NDSparse, which...) = NDSparse(x.index, Columns(columns(x.data)[[which...]]), presorted=true) -#columns(x::NDSparse, which) = NDSparse(x.index, x.data.columns[which], presorted=true) +#columns(x::NDSparse, which) = NDSparse(x.index, columns(x.data)[which], presorted=true) #column(x::NDSparse, which) = columns(x, which) diff --git a/src/reduce.jl b/src/reduce.jl index d1675e6a..bfd303a4 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -342,7 +342,7 @@ function Base.reduce(f, x::NDSparse; kws...) if dims isa Symbol dims = [dims] end - keep = setdiff([1:ndims(x);], map(d->fieldindex(x.index.columns,d), dims)) + keep = setdiff([1:ndims(x);], map(d->fieldindex(columns(x.index),d), dims)) if isempty(keep) throw(ArgumentError("to remove all dimensions, use `reduce(f, A)`")) end @@ -363,11 +363,11 @@ Like `reduce`, except uses a function mapping a vector of values to a scalar ins of a 2-argument scalar function. """ function reducedim_vec(f, x::NDSparse, dims; with=valuenames(x)) - keep = setdiff([1:ndims(x);], map(d->fieldindex(x.index.columns,d), dims)) + keep = setdiff([1:ndims(x);], map(d->fieldindex(columns(x.index),d), dims)) if isempty(keep) throw(ArgumentError("to remove all dimensions, use `reduce(f, A)`")) end - idxs, d = collect_columns(GroupBy(f, keys(x, (keep...,)), rows(x, with), sortpermby(x, (keep...,)))).columns + idxs, d = collect_columns(GroupBy(f, keys(x, (keep...,)), rows(x, with), sortpermby(x, (keep...,)))) |> columns NDSparse(idxs, d, presorted=true, copy=false) end From 9e549518c0c36663517df59a31c93328ee060045 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 20:11:53 +0000 Subject: [PATCH 10/21] keep naming consistent at Columns --- src/columns.jl | 48 ++++++++++++++++++++++++------------------------ src/reshape.jl | 4 ++-- src/utils.jl | 8 ++++---- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/columns.jl b/src/columns.jl index 1078cacc..0361d081 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -1,7 +1,7 @@ # to get rid of eventually const Columns = StructVector # There is a StackOverflow bug in this case in Base.unaliascopy -Base.copy(c::StructVector{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c +Base.copy(c::Columns{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c # IndexedTable-like API @@ -29,8 +29,8 @@ function colnames end Base.@pure colnames(t::AbstractVector) = (1,) columns(v::AbstractVector) = v -Base.@pure colnames(t::StructVector) = fieldnames(eltype(t)) -Base.@pure colnames(t::StructVector{<:Pair}) = colnames(t.first) => colnames(t.second) +Base.@pure colnames(t::Columns) = fieldnames(eltype(t)) +Base.@pure colnames(t::Columns{<:Pair}) = colnames(t.first) => colnames(t.second) """ columns(itr, select::Selection = All()) @@ -53,9 +53,9 @@ available selection options and syntax. """ function columns end -columns(c::StructVector{<:Tuple}) = Tuple(fieldarrays(c)) -columns(c::StructVector{<:NamedTuple}) = fieldarrays(c) -columns(c::StructVector{<:Pair}) = c.first => c.second +columns(c::Columns{<:Tuple}) = Tuple(fieldarrays(c)) +columns(c::Columns{<:NamedTuple}) = fieldarrays(c) +columns(c::Columns{<:Pair}) = c.first => c.second """ ncols(itr) @@ -68,19 +68,19 @@ Returns the number of columns in `itr`. ncols(rows(([1,2,3],[4,5,6]))) == 2 """ function ncols end -ncols(c::StructVector{T, C}) where {T, C} = fieldcount(C) -ncols(c::StructVector{<:Pair}) = ncols(c.first) => ncols(c.second) +ncols(c::Columns{T, C}) where {T, C} = fieldcount(C) +ncols(c::Columns{<:Pair}) = ncols(c.first) => ncols(c.second) ncols(c::AbstractArray) = 1 -summary(c::StructVector{D}) where {D<:Tuple} = "$(length(c))-element StructVector{$D}" +summary(c::Columns{D}) where {D<:Tuple} = "$(length(c))-element Columns{$D}" -_sizehint!(c::StructVector, n::Integer) = (foreachfield(x->_sizehint!(x,n), c); c) +_sizehint!(c::Columns, n::Integer) = (foreachfield(x->_sizehint!(x,n), c); c) -function _strip_pair(c::StructVector{<:Pair}) +function _strip_pair(c::Columns{<:Pair}) f, s = map(columns, fieldarrays(c)) (f isa AbstractVector) && (f = (f,)) (s isa AbstractVector) && (s = (s,)) - StructVector((f..., s...)) + Columns((f..., s...)) end # fused indexing operations @@ -96,13 +96,13 @@ end # row operations -copyrow!(I::StructVector, i, src) = foreachfield(c->copyelt!(c, i, src), I) -copyrow!(I::StructVector, i, src::StructVector, j) = foreachfield((c1,c2)->copyelt!(c1, i, c2, j), I, src) +copyrow!(I::Columns, i, src) = foreachfield(c->copyelt!(c, i, src), I) +copyrow!(I::Columns, i, src::Columns, j) = foreachfield((c1,c2)->copyelt!(c1, i, c2, j), I, src) copyrow!(I::AbstractArray, i, src::AbstractArray, j) = (@inbounds I[i] = src[j]) -pushrow!(to::StructVector, from::StructVector, i) = foreachfield((a,b)->push!(a, b[i]), to, from) +pushrow!(to::Columns, from::Columns, i) = foreachfield((a,b)->push!(a, b[i]), to, from) pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i]) -@generated function rowless(c::StructVector{D,C}, i, j) where {D,C} +@generated function rowless(c::Columns{D,C}, i, j) where {D,C} N = fieldcount(C) ex = :(cmpelts(getfield(fieldarrays(c),$N), i, j) < 0) for n in N-1:-1:1 @@ -115,7 +115,7 @@ pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i]) ex end -@generated function roweq(c::StructVector{D,C}, i, j) where {D,C} +@generated function roweq(c::Columns{D,C}, i, j) where {D,C} N = fieldcount(C) ex = :(cmpelts(getfield(fieldarrays(c),1), i, j) == 0) for n in 2:N @@ -128,7 +128,7 @@ end # uses number of columns from `d`, assuming `c` has more or equal # dimensions, for broadcast joins. -@generated function rowcmp(c::StructVector, i, d::StructVector{D}, j) where D +@generated function rowcmp(c::Columns, i, d::Columns{D}, j) where D N = fieldcount(D) ex = :(cmp(getfield(fieldarrays(c),$N)[i], getfield(fieldarrays(d),$N)[j])) for n in N-1:-1:1 @@ -149,7 +149,7 @@ end # all columns are equal except left >= right in last column. # Could be generalized to some number of trailing columns, but I don't # know whether that has applications. -@generated function row_asof(c::StructVector{D,C}, i, d::StructVector{D,C}, j) where {D,C} +@generated function row_asof(c::Columns{D,C}, i, d::Columns{D,C}, j) where {D,C} N = length(C.parameters) if N == 1 ex = :(!isless(getfield(fieldarrays(c),1)[i], getfield(fieldarrays(d),1)[j])) @@ -319,7 +319,7 @@ end column(c, x) = columns(c)[colindex(c, x)] # optimized method -@inline function column(c::StructVector, x::Union{Int, Symbol}) +@inline function column(c::Columns, x::Union{Int, Symbol}) getfield(fieldarrays(c), x) end @@ -391,11 +391,11 @@ the [`select`](@ref) function for selection options and syntax. function rows end rows(x::AbstractVector) = x -rows(cols::Tup) = StructVector(cols) +rows(cols::Tup) = Columns(cols) rows(t, which...) = rows(columns(t, which...)) -_cols_tuple(xs::StructVector) = columns(xs) +_cols_tuple(xs::Columns) = columns(xs) _cols_tuple(xs::AbstractArray) = (xs,) concat_cols(xs, ys) = rows(concat_tup(_cols_tuple(xs), _cols_tuple(ys))) @@ -425,7 +425,7 @@ function ColDict(t; copy=nothing) ColDict(Int[], t, convert(Array{Any}, collect(cnames)), Any[columns(t)...], copy) end -function Base.getindex(d::ColDict{<:StructVector}) +function Base.getindex(d::ColDict{<:Columns}) Columns(d.columns...; names=d.names) end @@ -757,4 +757,4 @@ end ### utils -compact_mem(x::StructVector) = StructVector(map(compact_mem, columns(x))) +compact_mem(x::Columns) = Columns(map(compact_mem, columns(x))) diff --git a/src/reshape.jl b/src/reshape.jl index 7960c4aa..aa540c10 100644 --- a/src/reshape.jl +++ b/src/reshape.jl @@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t labelcol = [label for i in 1:length(t) for label in labels] bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by)) - convert(collectiontype(D), StructVector(bycols), StructVector(labelcol, valuecol, names = [variable, value])) + convert(collectiontype(D), Columns(bycols), Columns(labelcol, valuecol, names = [variable, value])) end """ @@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value) end function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S} - dest_val = StructVector((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) + dest_val = Columns((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) for (i, el) in enumerate(val) for (k, v) in el ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed") diff --git a/src/utils.jl b/src/utils.jl index 08a22afe..ee1ecb39 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -168,18 +168,18 @@ Base.@pure function arrayof(S) if T == Union{} Vector{Union{}} elseif T<:Tuple - StructVector{T, staticschema(Tuple{map(arrayof, fieldtypes(T))...})} + Columns{T, staticschema(Tuple{map(arrayof, fieldtypes(T))...})} elseif T<:NamedTuple if fieldcount(T) == 0 - StructVector{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}} + Columns{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}} else - StructVector{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}} + Columns{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}} end elseif (T<:Union{Missing,String,WeakRefString} && Missing<:T) || T<:Union{String, WeakRefString} StringArray{T, 1} elseif T<:Pair - StructVector{T, NamedTuple{(:first, :second), Tuple{map(arrayof, T.parameters)...}}} + Columns{T, NamedTuple{(:first, :second), Tuple{map(arrayof, T.parameters)...}}} else Vector{T} end From 9aa4899bacbc48100dc89362511bbce305c85536 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 20:23:56 +0000 Subject: [PATCH 11/21] import refine_perm --- src/IndexedTables.jl | 3 ++- src/columns.jl | 4 ++-- src/indexing.jl | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/IndexedTables.jl b/src/IndexedTables.jl index 29ce44c6..f0025a70 100644 --- a/src/IndexedTables.jl +++ b/src/IndexedTables.jl @@ -14,7 +14,8 @@ import Base: tuple_type_cons, tuple_type_head, tuple_type_tail, in, convert -using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema +using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema, + refine_perm! #-----------------------------------------------------------------------# exports export diff --git a/src/columns.jl b/src/columns.jl index 0361d081..0d2d1c03 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -180,11 +180,11 @@ elementwise. Collect output as `Columns` if `f` returns map_rows(i -> (exp = exp(i), log = log(i)), 1:5) """ function map_rows(f, iters...) - collect_structarray(f(i...) for i in zip(iters...)) + collect_columns(f(i...) for i in zip(iters...)) end # 1-arg case -map_rows(f, iter) = collect_structarray(f(i) for i in iter) +map_rows(f, iter) = collect_columns(f(i) for i in iter) ## Special selectors to simplify column selector diff --git a/src/indexing.jl b/src/indexing.jl index 4a808cb0..e22d8ed4 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -42,19 +42,19 @@ isconstrange(col::AbstractVector{T}, idx::T) where {T} = true isconstrange(col, idx::AbstractArray) = isequal(first(idx), last(idx)) function range_estimate(I::Columns, idxs) - r = range_estimate(I.columns[1], idxs[1]) + r = range_estimate(columns(I)[1], idxs[1]) i = 1; n = length(idxs) - while i < n && isconstrange(I.columns[i], idxs[i]) + while i < n && isconstrange(columns(I)[i], idxs[i]) i += 1 - r = intersect(r, range_estimate(I.columns[i], idxs[i], first(r), last(r))) + r = intersect(r, range_estimate(columns(I)[i], idxs[i], first(r), last(r))) end return r end function _getindex(t::NDSparse, idxs) I = t.index - cs = astuple(I.columns) - if fieldcount(typeof(idxs)) !== fieldcount(typeof(I.columns)) + cs = astuple(columns(I)) + if fieldcount(typeof(idxs)) !== fieldcount(typeof(columns(I))) error("wrong number of indices") end for idx in idxs @@ -63,7 +63,7 @@ function _getindex(t::NDSparse, idxs) out = convert(Vector{Int32}, range_estimate(I, idxs)) filter!(i->row_in(cs, i, idxs), out) keepdims = filter(i->eltype(columns(t.index)[i]) != typeof(idxs[i]), 1:length(idxs)) - NDSparse(Columns(map(x->x[out], getsubfields(I.columns, keepdims))), t.data[out], presorted=true) + NDSparse(Columns(map(x->x[out], getsubfields(columns(I), keepdims))), t.data[out], presorted=true) end # iterators over indices - lazy getindex @@ -76,7 +76,7 @@ same index arguments as `getindex`. """ function where(d::NDSparse, idxs::Vararg{Any,N}) where N I = d.index - cs = astuple(I.columns) + cs = astuple(columns(I)) data = d.data rng = range_estimate(I, idxs) (data[i] for i in Iterators.Filter(r->row_in(cs, r, idxs), rng)) @@ -90,7 +90,7 @@ indices. """ function update!(f::Union{Function,Type}, d::NDSparse, idxs::Vararg{Any,N}) where N I = d.index - cs = astuple(I.columns) + cs = astuple(columns(I)) data = d.data rng = range_estimate(I, idxs) for r in rng @@ -111,7 +111,7 @@ Similar to `where`, but returns an iterator giving `index=>value` pairs. """ function pairs(d::NDSparse, idxs::Vararg{Any,N}) where N I = d.index - cs = astuple(I.columns) + cs = astuple(columns(I)) data = d.data rng = range_estimate(I, idxs) (I[i]=>data[i] for i in Compat.Iterators.Filter(r->row_in(cs, r, idxs), rng)) @@ -190,7 +190,7 @@ function _setindex!(d::NDSparse{T,D}, rhs, idxs) where {T,D} end flush!(d) I = d.index - cs = astuple(I.columns) + cs = astuple(columns(I)) data = d.data rng = range_estimate(I, idxs) for r in rng From 197de5637e9ffcc9fb76f3a47966d535370ccaff Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 21:56:47 +0000 Subject: [PATCH 12/21] avoid multi argument Columns --- src/columns.jl | 2 +- src/ndsparse.jl | 2 +- src/reshape.jl | 4 ++-- test/test_core.jl | 22 +++++++++++----------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/columns.jl b/src/columns.jl index 0d2d1c03..cef4071c 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -426,7 +426,7 @@ function ColDict(t; copy=nothing) end function Base.getindex(d::ColDict{<:Columns}) - Columns(d.columns...; names=d.names) + Columns(Tuple(d.columns); names=d.names) end Base.getindex(d::ColDict, key) = rows(d[], key) diff --git a/src/ndsparse.jl b/src/ndsparse.jl index 44752251..c72a36bb 100644 --- a/src/ndsparse.jl +++ b/src/ndsparse.jl @@ -177,7 +177,7 @@ Construct an NDSparse array from columns. The last argument is the data column, """ function NDSparse(columns...; names=nothing, rest...) keys, data = columns[1:end-1], columns[end] - ndsparse(Columns(keys..., names=names), data; rest...) + ndsparse(Columns(keys, names=names), data; rest...) end similar(t::NDSparse) = NDSparse(similar(t.index, 0), similar(t.data, 0)) diff --git a/src/reshape.jl b/src/reshape.jl index aa540c10..b89257b2 100644 --- a/src/reshape.jl +++ b/src/reshape.jl @@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t labelcol = [label for i in 1:length(t) for label in labels] bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by)) - convert(collectiontype(D), Columns(bycols), Columns(labelcol, valuecol, names = [variable, value])) + convert(collectiontype(D), Columns(bycols), Columns((labelcol, valuecol), names = [variable, value])) end """ @@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value) end function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S} - dest_val = Columns((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols) + dest_val = Columns(Tuple(Array{Union{T, Missing}}(undef, length(val)) for i in cols); names = cols) for (i, el) in enumerate(val) for (k, v) in el ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed") diff --git a/test/test_core.jl b/test/test_core.jl index b91ccf5d..ffd60df2 100644 --- a/test/test_core.jl +++ b/test/test_core.jl @@ -1,14 +1,14 @@ - c = Columns([1,1,1,2,2], [1,2,4,3,5]) - d = Columns([1,1,2,2,2], [1,3,1,4,5]) - e = Columns([1,1,1], sort([rand(),0.5,rand()])) - f = Columns([1,1,1], sort([rand(),0.5,rand()])) - @test map(+,NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns([1,2],[1,5]) + c = Columns(([1,1,1,2,2], [1,2,4,3,5])) + d = Columns(([1,1,2,2,2], [1,3,1,4,5])) + e = Columns(([1,1,1], sort([rand(),0.5,rand()]))) + f = Columns(([1,1,1], sort([rand(),0.5,rand()]))) + @test map(+,NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns(([1,2],[1,5])) @test length(map(+,NDSparse(e,ones(3)),NDSparse(f,ones(3)))) == 1 @test eltype(c) == Tuple{Int,Int} @test map_rows(i -> (exp = exp(i), log = log(i)), 1:5) == Columns((exp = exp.(1:5), log = log.(1:5))) - @test map_rows(tuple, 1:3, ["a","b","c"]) == Columns([1,2,3], ["a","b","c"]) + @test map_rows(tuple, 1:3, ["a","b","c"]) == Columns(([1,2,3], ["a","b","c"])) c = Columns(Columns((a=[1,2,3],)) => Columns((b=["a","b","c"],))) @test columns(c).first == Columns((a=[1,2,3],)) @@ -33,7 +33,7 @@ empty!(d) @test d == c[Int[]] @test c != Columns((a=[1,2,3], b=["a","b","c"])) - x = Columns([1], [1.0], WeakRefStrings.StringArray(["a"])) + x = Columns(([1], [1.0], WeakRefStrings.StringArray(["a"]))) @test IndexedTables.arrayof(eltype(x)) == typeof(x) @test IndexedTables.arrayof(WeakRefString{UInt8}) == WeakRefStrings.StringArray{WeakRefString{UInt8},1} @test typeof(similar(c, 10)) == typeof(similar(typeof(c), 10)) == typeof(c) @@ -42,10 +42,10 @@ @test sortperm(c) == [1,2,3] permute!(c, [2,3, 1]) @test c == Columns(Columns((a=[2,3,1],)) => Columns((b=["b","c","a"],))) - f = Columns(Columns([1, 1, 2, 2]) => ["b", "a", "c", "d"]) - @test IndexedTables._strip_pair(f) == Columns([1, 1, 2, 2], ["b", "a", "c", "d"]) + f = Columns(Columns(([1, 1, 2, 2],)) => ["b", "a", "c", "d"]) + @test IndexedTables._strip_pair(f) == Columns(([1, 1, 2, 2], ["b", "a", "c", "d"])) @test sortperm(f) == [2, 1, 3, 4] - @test sort(f) == Columns(Columns([1, 1, 2, 2]) => ["a", "b", "c", "d"]) + @test sort(f) == Columns(Columns(([1, 1, 2, 2],)) => ["a", "b", "c", "d"]) @test !issorted(f) #end @@ -93,7 +93,7 @@ end # Tuple output b1 = broadcast((x,y)->(x.a, y.c), t, t1) @test isa(b1.data, Columns) - @test b1 == NDSparse(idx, Columns([5,6], [4,5])) + @test b1 == NDSparse(idx, Columns(([5,6], [4,5]))) b2 = broadcast((x,y)->(m=x.a, n=y.c), t, t1) @test b2 == NDSparse(idx, Columns(m=[5,6], n=[4,5])) From 9259f6ab6d4e4da5ca5c2c3e6748af6aae8a336a Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 22:18:07 +0000 Subject: [PATCH 13/21] fixed ndsparse core tests --- src/indexedtable.jl | 2 +- src/ndsparse.jl | 2 +- test/test_core.jl | 44 ++++++++++++++++++++++---------------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/indexedtable.jl b/src/indexedtable.jl index 0c7efede..016c27b3 100644 --- a/src/indexedtable.jl +++ b/src/indexedtable.jl @@ -312,7 +312,7 @@ a default key of tuples `(1,):(n,)` is generated. """ function pkeys(t::IndexedTable) if isempty(t.pkey) - Columns(Base.OneTo(length(t))) + Columns((Base.OneTo(length(t)),)) else rows(t, pkeynames(t)) end diff --git a/src/ndsparse.jl b/src/ndsparse.jl index c72a36bb..063fbc75 100644 --- a/src/ndsparse.jl +++ b/src/ndsparse.jl @@ -383,7 +383,7 @@ function convert(::Type{NDSparse}, a::AbstractArray{T}) where T end i += 1 end - NDSparse(Columns(reverse(idxs)...), data, presorted=true) + NDSparse(Columns(Tuple(Iterators.reverse(idxs))), data, presorted=true) end # aggregation diff --git a/test/test_core.jl b/test/test_core.jl index ffd60df2..52643747 100644 --- a/test/test_core.jl +++ b/test/test_core.jl @@ -213,7 +213,7 @@ x = NDSparse(Columns(x = [1,2,3], y = [4,5,6], z = [7,8,9]), [10,11,12]) # test showing -@test repr(ndsparse(Columns([1]), Columns(()))) == """ +@test repr(ndsparse(Columns(([1],)), Columns(()))) == """ 1-d NDSparse with 1 values (0-tuples): 1 │ ──┼ @@ -284,7 +284,7 @@ function foo(n, data=ones(Int, 1)) NDSparse(Columns(t([ones(Int, 1) for i=1:n]...)), data) end -let x = Columns([6,5,4,3,2,2,1],[4,4,4,4,4,4,4],[1,2,3,4,5,6,7]) +let x = Columns(([6,5,4,3,2,2,1],[4,4,4,4,4,4,4],[1,2,3,4,5,6,7])) @test issorted(x[sortperm(x)]) end @@ -300,7 +300,7 @@ let hitemps = NDSparse([fill("New York",3); fill("Boston",3)], [91,76]) end - cs = Columns([1], [2]) + cs = Columns(([1], [2])) t = table(cs) @test t.pkey == Int[] @test t.columns == [(1,2)] @@ -309,22 +309,22 @@ end @test column(t.columns,1) === columns(cs)[1] t = table(cs, copy=false, pkey=[1]) @test column(t.columns,1) === columns(cs)[1] - cs = Columns([2, 1], [3,4]) + cs = Columns(([2, 1], [3,4])) t = table(cs, copy=false, pkey=[1]) @test t.pkey == Int[1] - cs = Columns([2, 1], [3,4]) + cs = Columns(([2, 1], [3,4])) t = table(cs, copy=false, pkey=[1]) @test column(t.columns,1) === columns(cs)[1] @test t.pkey == Int[1] @test t.columns == [(1,4), (2,3)] - cs = Columns(x=[2, 1], y=[3,4]) + cs = Columns((x=[2, 1], y=[3,4])) t = table(cs, copy=false, pkey=:x) @test column(t.columns,1) === columns(cs).x @test t.pkey == Int[1] @test t.columns == [(x=1,y=4), (x=2,y=3)] - cs = Columns([2, 1], [3,4]) + cs = Columns(([2, 1], [3,4])) t = table(cs, presorted=true, pkey=[1]) @test t.pkey == Int[1] @test t.columns == [(2,3), (1,4)] @@ -333,7 +333,7 @@ end b = table([1, 2, 3], [4, 5, 6], names=[:x, :y]) @test table(([1, 2, 3], [4, 5, 6])) == a @test table((x = [1, 2, 3], y = [4, 5, 6])) == b - @test table(Columns([1, 2, 3], [4, 5, 6])) == a + @test table(Columns(([1, 2, 3], [4, 5, 6]))) == a @test table(Columns(x=[1, 2, 3], y=[4, 5, 6])) == b @test b == table(b) b = table([2, 3, 1], [4, 5, 6], names=[:x, :y], pkey=:x) @@ -355,15 +355,15 @@ end @test excludecols([1, 2, 3], (1,)) == () @test convert(IndexedTable, Columns(x=[1, 2], y=[3, 4]), Columns(z=[1, 2]), presorted=true) == table([1, 2], [3, 4], [1, 2], names=Symbol[:x, :y, :z]) @test colnames([1, 2, 3]) == (1,) - @test colnames(Columns([1, 2, 3], [3, 4, 5])) == (1, 2) + @test colnames(Columns(([1, 2, 3], [3, 4, 5]))) == (1, 2) @test colnames(table([1, 2, 3], [3, 4, 5])) == (1, 2) @test colnames(Columns(x=[1, 2, 3], y=[3, 4, 5])) == (:x, :y) @test colnames(table([1, 2, 3], [3, 4, 5], names=[:x, :y])) == (:x, :y) @test colnames(ndsparse(Columns(x=[1, 2, 3]), Columns(y=[3, 4, 5]))) == (:x, :y) @test colnames(ndsparse(Columns(x=[1, 2, 3]), [3, 4, 5])) == (:x, 2) @test colnames(ndsparse(Columns(x=[1, 2, 3]), [3, 4, 5])) == (:x, 2) - @test colnames(ndsparse(Columns([1, 2, 3], [4, 5, 6]), Columns(x=[6, 7, 8]))) == (1, 2, :x) - @test colnames(ndsparse(Columns(x=[1, 2, 3]), Columns([3, 4, 5], [6, 7, 8]))) == (:x, 2, 3) + @test colnames(ndsparse(Columns(([1, 2, 3], [4, 5, 6])), Columns(x=[6, 7, 8]))) == (1, 2, :x) + @test colnames(ndsparse(Columns(x=[1, 2, 3]), Columns(([3, 4, 5], [6, 7, 8])))) == (:x, 2, 3) x = ndsparse(["a", "b"], [3, 4]) @test (keytype(x), eltype(x)) == (Tuple{String}, Int) @@ -381,9 +381,9 @@ end x = ndsparse([1, 2], [3, 4]) @test pkeynames(x) == (1,) - a = Columns([1,2,1],["foo","bar","baz"]) - b = Columns([2,1,1],["bar","baz","foo"]) - c = Columns([1,1,2],["foo","baz","bar"]) + a = Columns(([1,2,1],["foo","bar","baz"])) + b = Columns(([2,1,1],["bar","baz","foo"])) + c = Columns(([1,1,2],["foo","baz","bar"])) @test a != b @test a != c @test b != c @@ -404,16 +404,16 @@ end #78 @test_throws ArgumentError map(x->throw(ArgumentError("x")), a) @inferred Columns((c=[1],)) - @inferred Columns([1]) - @test_throws ErrorException @inferred Columns(c=[1]) # bad + @inferred Columns(([1],)) + @inferred Columns(c=[1]) #@inferred NDSparse(Columns(c=[1]), [1]) #@inferred NDSparse(Columns([1]), [1]) - c = Columns([1,1,1,2,2], [1,2,4,3,5]) - d = Columns([1,1,2,2,2], [1,3,1,4,5]) - e = Columns([1,1,1], sort([rand(),0.5,rand()])) - f = Columns([1,1,1], sort([rand(),0.5,rand()])) - @test merge(NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns([1,1,1,1,2,2,2,2],[1,2,3,4,1,3,4,5]) - @test eltype(merge(NDSparse(c,Columns(ones(Int, 5))),NDSparse(d,Columns(ones(Float64, 5)))).data) == Tuple{Float64} + c = Columns(([1,1,1,2,2], [1,2,4,3,5])) + d = Columns(([1,1,2,2,2], [1,3,1,4,5])) + e = Columns(([1,1,1], sort([rand(),0.5,rand()]))) + f = Columns(([1,1,1], sort([rand(),0.5,rand()]))) + @test merge(NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns(([1,1,1,1,2,2,2,2],[1,2,3,4,1,3,4,5])) + @test eltype(merge(NDSparse(c,Columns((ones(Int, 5),))),NDSparse(d,Columns((ones(Float64, 5),)))).data) == Tuple{Float64} @test eltype(merge(NDSparse(c,Columns(x=ones(Int, 5))),NDSparse(d,Columns(x=ones(Float64, 5)))).data) == typeof((x=0.,)) @test length(merge(NDSparse(e,ones(3)),NDSparse(f,ones(3)))) == 5 @test vcat(Columns(x=[1]), Columns(x=[1.0])) == Columns(x=[1,1.0]) From 9c3987fbccf608027c09d2b47d7dec63a04033ba Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Mon, 17 Dec 2018 23:10:56 +0000 Subject: [PATCH 14/21] fixed core tests --- src/indexedtable.jl | 2 +- src/reduce.jl | 4 ++-- test/test_core.jl | 22 +++++++++++----------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/indexedtable.jl b/src/indexedtable.jl index 016c27b3..3f8f0be2 100644 --- a/src/indexedtable.jl +++ b/src/indexedtable.jl @@ -141,7 +141,7 @@ function table(cs::Tup; chunks=nothing, kwargs...) end table(cs::Columns; kwargs...) = table(columns(cs); kwargs...) -table(c::Columns{<:Pair}; kwargs...) = convert(IndexedTable, c.columns.first, c.columns.second; kwargs...) +table(c::Columns{<:Pair}; kwargs...) = convert(IndexedTable, columns(c).first, columns(c).second; kwargs...) function table(cols::AbstractArray...; names=nothing, kwargs...) if isa(names, AbstractArray) && all(x->isa(x, Symbol), names) diff --git a/src/reduce.jl b/src/reduce.jl index bfd303a4..b0c7baf9 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -251,14 +251,14 @@ init_func(ac::ApplyColwise{<:Tuple}, t::AbstractVector) = Tuple(Symbol(n) => f for (f, n) in zip(ac.functions, ac.names)) function init_func(ac::ApplyColwise{<:Tuple}, t::Columns) if ac.stack - dd -> Columns(collect(colnames(t)), ([f(x) for x in columns(dd)] for f in ac.functions)...; names = vcat(ac.variable, ac.names)) + dd -> Columns((collect(colnames(t)), ([f(x) for x in columns(dd)] for f in ac.functions)...); names = vcat(ac.variable, ac.names)) else Tuple(Symbol(s, :_, n) => s => f for s in colnames(t), (f, n) in zip(ac.functions, ac.names)) end end init_func(ac::ApplyColwise, t::Columns) = - ac.stack ? dd -> Columns(collect(colnames(t)), [ac.functions(x) for x in columns(dd)]; names = vcat(ac.variable, ac.names)) : + ac.stack ? dd -> Columns((collect(colnames(t)), [ac.functions(x) for x in columns(dd)]); names = vcat(ac.variable, ac.names)) : Tuple(s => s => ac.functions for s in colnames(t)) init_func(ac::ApplyColwise, t::AbstractVector) = ac.functions diff --git a/test/test_core.jl b/test/test_core.jl index 52643747..68ea1106 100644 --- a/test/test_core.jl +++ b/test/test_core.jl @@ -679,11 +679,11 @@ end @test c[12,52] == 67 @test c[32,34] == 160 @test length(c.index) == 2 - @test naturaljoin(a, b) == NDSparse([12,32], [52,34], Columns([11,150], [56,10])) + @test naturaljoin(a, b) == NDSparse([12,32], [52,34], Columns(([11,150], [56,10]))) - c = NDSparse([12,32], [52,34], Columns([0,1], [2,3])) - @test naturaljoin(a, c) == NDSparse([12,32], [52,34], Columns([11,150], [0,1], [2,3])) - @test naturaljoin(c, a) == NDSparse([12,32], [52,34], Columns([0,1], [2,3], [11,150])) + c = NDSparse([12,32], [52,34], Columns(([0,1], [2,3]))) + @test naturaljoin(a, c) == NDSparse([12,32], [52,34], Columns(([11,150], [0,1], [2,3]))) + @test naturaljoin(c, a) == NDSparse([12,32], [52,34], Columns(([0,1], [2,3], [11,150]))) @test isequal( leftjoin(t1, t2, lselect=2, rselect=2), @@ -702,12 +702,12 @@ end @test isequal(leftjoin(NDSparse([1,1,1,2], [2,3,4,4], [5,6,7,8]), NDSparse([1,1,3], [2,4,4], [9,10,12])), - NDSparse([1,1,1,2], [2,3,4,4], Columns([5, 6, 7, 8], [9, missing, 10, missing]))) + NDSparse([1,1,1,2], [2,3,4,4], Columns(([5, 6, 7, 8], [9, missing, 10, missing])))) @test isequal( leftjoin(NDSparse([1,1,1,2], [2,3,4,4], [5,6,7,8]), NDSparse([1,1,2], [2,4,4], [9,10,12])), - NDSparse([1,1,1,2], [2,3,4,4], Columns([5, 6, 7, 8], [9, missing, 10, 12]))) + NDSparse([1,1,1,2], [2,3,4,4], Columns(([5, 6, 7, 8], [9, missing, 10, 12])))) @test isequal(outerjoin(t1, t2, lselect=2, rselect=2), table([0,1,2,3,4,5], [missing, 5,6,7,8,missing], [5,missing,missing,6,7,8])) @@ -1093,7 +1093,7 @@ end A = [1] # shouldn't mutate input mapslices(x, [:a]) do slice - NDSparse(Columns(A), A) + NDSparse(Columns((A,)), A) end @test A == [1] @@ -1115,15 +1115,15 @@ end @test t==NDSparse(Columns(a_1=[1], a_2=[2], c=[2]), Columns(d=[1])) # signleton slices - x=NDSparse(Columns([1,2]),Columns([1,2])) + x=NDSparse(Columns(([1,2],)),Columns(([1,2],))) @test_throws ErrorException mapslices(x,()) do slice true end t = mapslices(x,()) do slice - @test slice == NDSparse(Columns([1]), Columns([1])) || slice == NDSparse(Columns([2]), Columns([2])) - NDSparse(Columns([1]), ([1])) + @test slice == NDSparse(Columns(([1],)), Columns(([1],))) || slice == NDSparse(Columns(([2],)), Columns(([2],))) + NDSparse(Columns(([1],)), ([1])) end - @test t == NDSparse(Columns([1,2], [1,1]), [1,1]) + @test t == NDSparse(Columns(([1,2], [1,1])), [1,1]) x = NDSparse([1,1,1,2,2,2,3,3],[1,2,3,4,5,6,7,8],rand(8)); y = mapslices(t -> (1, 2), x, 2) From f66e29033c8e7045dafade603d3cbd5a418ab7f4 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 11:00:04 +0000 Subject: [PATCH 15/21] use old copy behavior --- src/indexedtable.jl | 4 ++-- src/ndsparse.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/indexedtable.jl b/src/indexedtable.jl index 3f8f0be2..64e0ab7e 100644 --- a/src/indexedtable.jl +++ b/src/indexedtable.jl @@ -101,10 +101,10 @@ function table(::Val{:serial}, cols::Tup; cs = permute!(cs, perm) end elseif copy - cs = Base.copy(cs) + cs = copyto!(similar(cs), cs) end elseif copy - cs = Base.copy(cs) + cs = copyto!(similar(cs), cs) end intpkey = map(k->colindex(cs, k), pkey) diff --git a/src/ndsparse.jl b/src/ndsparse.jl index 063fbc75..a83b77ee 100644 --- a/src/ndsparse.jl +++ b/src/ndsparse.jl @@ -91,8 +91,8 @@ function ndsparse(::Val{:serial}, ks::Tup, vs::Union{Tup, AbstractVector}; I, d = collect_columns(iter) |> columns agg = nothing else - I = Base.copy(I) - d = Base.copy(d) + I = copyto!(similar(I), I) + d = copyto!(similar(d), d) end end stripnames(x) = isa(x, Columns) ? rows(astuple(columns(x))) : rows((x,)) From 621caae7a6095fae59b4873877e634420db523b8 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 11:08:45 +0000 Subject: [PATCH 16/21] fix flatten --- src/flatten.jl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/flatten.jl b/src/flatten.jl index 9f75e762..4de17ecd 100644 --- a/src/flatten.jl +++ b/src/flatten.jl @@ -16,10 +16,10 @@ function dedup_names(ns) end function mapslices(f, x::NDSparse, dims; name = nothing) - iterdims = setdiff([1:ndims(x);], map(d->fieldindex(x.index.columns,d), dims)) - idx = Any[Colon() for v in x.index.columns] + iterdims = setdiff([1:ndims(x);], map(d->fieldindex(columns(x.index),d), dims)) + idx = Any[Colon() for v in columns(x.index)] - iter = Columns(astuple(x.index.columns)[[iterdims...]]) + iter = Columns(astuple(columns(x.index))[[iterdims...]]) if !isempty(dims) || !issorted(iter) iter = sort(iter) end @@ -48,7 +48,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing) for j=1:n @inbounds index_first[j] = iter[1] end - index = Columns(index_first.columns..., astuple(copy(y.index).columns)...; names=ns) + index = Columns((columns(index_first)..., astuple(columns(copy(y.index)))...); names=ns) data = copy(y.data) output = NDSparse(index, data) if isempty(dims) @@ -61,7 +61,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing) if !all(x->isa(x, Symbol), ns) ns = nothing end - index = Columns(iter[1:1].columns...; names=ns) + index = Columns(Tuple(columns(iter[1:1])); names=ns) if isa(y, Tup) vec = convert(Columns, [y]) else @@ -70,7 +70,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing) if name === nothing output = NDSparse(index, vec) else - output = NDSparse(index, Columns(vec, names=[name])) + output = NDSparse(index, Columns(Tuple(columns(vec)), names=[name])) end if isempty(dims) error("calling mapslices with no dimensions and scalar return value -- use map instead") @@ -81,7 +81,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing) end function _mapslices_scalar!(f, output, x, iter, iterdims, start, coerce) - idx = Any[Colon() for v in x.index.columns] + idx = Any[Colon() for v in columns(x.index)] for i = start:length(iter) if i != 1 && roweq(iter, i-1, i) # We've already visited this slice @@ -105,15 +105,15 @@ function _mapslices_itable_singleton!(f, output, x, start) I = output.index D = output.data - I1 = Columns(I.columns[1:ndims(x)]) - I2 = Columns(I.columns[ndims(x)+1:end]) + I1 = Columns(columns(I)[1:ndims(x)]) + I2 = Columns(columns(I)[ndims(x)+1:end]) i = start for i in start:length(x) k = x.index[i] y = f(NDSparse(x.index[i:i], x.data[i:i])) n = length(y) - foreach((x,y)->append_n!(x,y,n), I1.columns, k) + foreach((x,y)->append_n!(x,y,n), columns(I1), k) append!(I2, y.index) append!(D, y.data) end @@ -121,13 +121,13 @@ function _mapslices_itable_singleton!(f, output, x, start) end function _mapslices_itable!(f, output, x, iter, iterdims, start) - idx = Any[Colon() for v in x.index.columns] + idx = Any[Colon() for v in columns(x.index)] I = output.index D = output.data initdims = length(iterdims) - I1 = Columns(getsubfields(I.columns, 1:initdims)) # filled from existing table - I2 = Columns(getsubfields(I.columns, initdims+1:fieldcount(typeof(I.columns)))) # filled from output tables + I1 = Columns(getsubfields(columns(I), 1:initdims)) # filled from existing table + I2 = Columns(getsubfields(columns(I), initdims+1:fieldcount(typeof(columns(I))))) # filled from output tables for i = start:length(iter) if i != 1 && roweq(iter, i-1, i) # We've already visited this slice @@ -144,7 +144,7 @@ function _mapslices_itable!(f, output, x, iter, iterdims, start) y = f(subtable) n = length(y) - foreach((x,y)->append_n!(x,y,n), I1.columns, iter[i]) + foreach((x,y)->append_n!(x,y,n), columns(I1), iter[i]) append!(I2, y.index) append!(D, y.data) end From 9a85be88bdf2bd8ed6c210597c653c4f4b7babd1 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 11:15:51 +0000 Subject: [PATCH 17/21] fix all tests --- src/collect.jl | 6 +++--- test/test_collect.jl | 4 ++-- test/test_utils.jl | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/collect.jl b/src/collect.jl index 47166287..e856d9dc 100644 --- a/src/collect.jl +++ b/src/collect.jl @@ -98,7 +98,7 @@ function collect_columns_flattened(itr, el::Pair, st) end function collect_columns_flattened!(dest::Columns{<:Pair}, itr, el::Pair, st) - dest_key, dest_data = dest.columns + dest_key, dest_data = columns(dest) while true elem = iterate(itr, st) elem === nothing && break @@ -178,7 +178,7 @@ function widencolumns(dest, i, el::S, ::Type{T}) where{S, T} end function widencolumns(dest::Columns{<:Pair}, i, el::Pair, ::Type{Pair{T1, T2}}) where{T1, T2} - dest1 = fieldwise_isa(el.first, T1) ? dest.columns.first : widencolumns(dest.columns.first, i, el.first, T1) - dest2 = fieldwise_isa(el.second, T2) ? dest.columns.second : widencolumns(dest.columns.second, i, el.second, T2) + dest1 = fieldwise_isa(el.first, T1) ? columns(dest).first : widencolumns(columns(dest).first, i, el.first, T1) + dest2 = fieldwise_isa(el.second, T2) ? columns(dest).second : widencolumns(columns(dest).second, i, el.second, T2) Columns(dest1 => dest2) end diff --git a/test/test_collect.jl b/test/test_collect.jl index 610f54c3..c6a52b8b 100644 --- a/test/test_collect.jl +++ b/test/test_collect.jl @@ -63,11 +63,11 @@ end # empty itr = Iterators.filter(t -> t > 10, 1:8) tuple_itr = ((i+1, i-1) for i in itr) - @test collect_columns(tuple_itr) == Columns(Int[], Int[]) + @test collect_columns(tuple_itr) == Columns((Int[], Int[])) itr = (i for i in 0:-1) tuple_itr = ((i+1, i-1) for i in itr) - @test collect_columns(tuple_itr) == Columns(Int[], Int[]) + @test collect_columns(tuple_itr) == Columns((Int[], Int[])) end @testset "collectscalars" begin diff --git a/test/test_utils.jl b/test/test_utils.jl index 264758aa..9a68a859 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -13,9 +13,9 @@ let a = [1:10;] @test IndexedTables._sizehint!(a, 20) === a end -@test Columns([1,2], [3,4]) == Columns([1,2], [3.0,4.0]) -@test Columns([1,2], [3,4]) != Columns([1,2], [3.0,4.1]) -@test Columns([1,2], [3,4]) != Columns(a=[1,2], b=[3,4]) +@test Columns(([1,2], [3,4])) == Columns(([1,2], [3.0,4.0])) +@test Columns(([1,2], [3,4])) != Columns(([1,2], [3.0,4.1])) +@test Columns(([1,2], [3,4])) != Columns((a=[1,2], b=[3,4])) function roundtrips(x) b = IOBuffer() @@ -24,13 +24,13 @@ function roundtrips(x) return deserialize(b) == x end -@test roundtrips(Columns(rand(5), rand(5))) +@test roundtrips(Columns((rand(5), rand(5)))) @test roundtrips(Columns(c1 = rand(5), c2 = rand(5))) @test roundtrips(convert(NDSparse, rand(3,3))) @test roundtrips(NDSparse(Columns(y=rand(3), x=rand(3)), rand(3))) let x = rand(3), y = rand(3), v = rand(3), w = rand(3) - @test vcat(Columns(x,y), Columns(v,w)) == Columns(vcat(x,v), vcat(y,w)) + @test vcat((Columns((x,y))), Columns((v,w))) == Columns((vcat(x,v), vcat(y,w))) @test vcat(Columns(x=x,y=y), Columns(x=v,y=w)) == Columns(x=vcat(x,v), y=vcat(y,w)) end @@ -60,5 +60,5 @@ let # 97 x = ndsparse((t=[0.01, 0.05],), (x=[1,2], y=[3,4],)) - @test map(p->(r = sum(p),), x).data == Columns([4,6], names=[:r]) + @test map(p->(r = sum(p),), x).data == Columns(([4,6],), names=[:r]) end From 33a617c9ecc9bcb86b9b0d44bb0b82ba38973691 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 11:20:51 +0000 Subject: [PATCH 18/21] switch to structarray collection mechanism --- src/IndexedTables.jl | 4 +- src/collect.jl | 121 ++----------------------------------------- test/test_collect.jl | 43 ++++++--------- test/test_core.jl | 2 +- 4 files changed, 25 insertions(+), 145 deletions(-) diff --git a/src/IndexedTables.jl b/src/IndexedTables.jl index f0025a70..6e8960f8 100644 --- a/src/IndexedTables.jl +++ b/src/IndexedTables.jl @@ -14,8 +14,8 @@ import Base: tuple_type_cons, tuple_type_head, tuple_type_tail, in, convert -using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema, - refine_perm! +using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema, ArrayInitializer, + refine_perm!, collect_structarray, collect_empty_structarray, grow_to_structarray!, collect_to_structarray! #-----------------------------------------------------------------------# exports export diff --git a/src/collect.jl b/src/collect.jl index e856d9dc..276ed515 100644 --- a/src/collect.jl +++ b/src/collect.jl @@ -1,4 +1,4 @@ -_is_subtype(::Type{S}, ::Type{T}) where {S, T} = promote_type(S, T) == T +const default_initializer = ArrayInitializer(t -> t<:Union{Tuple, NamedTuple, Pair}) """ collect_columns(itr) @@ -14,50 +14,11 @@ Collect an iterable as a `Columns` object if it iterates `Tuples` or `NamedTuple s2 = Iterators.filter(isodd, 1:8) collect_columns(s2) """ -collect_columns(itr) = collect_columns(itr, Base.IteratorSize(itr)) +collect_columns(itr) = collect_structarray(itr, initializer = default_initializer) +collect_empty_columns(itr) = collect_empty_structarray(itr, initializer = default_initializer) -function collect_empty_columns(itr::T) where {T} - S = Core.Compiler.return_type(first, Tuple{T}) - similar(arrayof(S), 0) -end - -function collect_columns(@nospecialize(itr), ::Union{Base.HasShape, Base.HasLength}) - st = iterate(itr) - st === nothing && return collect_empty_columns(itr) - el, i = st - dest = similar(arrayof(typeof(el)), length(itr)) - dest[1] = el - collect_to_columns!(dest, itr, 2, i) -end - -function collect_to_columns!(dest::AbstractArray{T}, itr, offs, st) where {T} - # collect to dest array, checking the type of each result. if a result does not - # match, widen the result type and re-dispatch. - i = offs - while true - elem = iterate(itr, st) - elem === nothing && break - el, st = elem - if fieldwise_isa(el, T) - @inbounds dest[i] = el - i += 1 - else - new = widencolumns(dest, i, el, T) - @inbounds new[i] = el - return collect_to_columns!(new, itr, i+1, st) - end - end - return dest -end - -function collect_columns(itr, ::Base.SizeUnknown) - elem = iterate(itr) - elem === nothing && return collect_empty_columns(itr) - el, st = elem - dest = similar(arrayof(typeof(el)), 1) - dest[1] = el - grow_to_columns!(dest, itr, iterate(itr, st)) -end +grow_to_columns!(args...) = grow_to_structarray!(args...) +collect_to_columns!(args...) = collect_to_structarray!(args...) function collect_columns_flattened(itr) elem = iterate(itr) @@ -110,75 +71,3 @@ function collect_columns_flattened!(dest::Columns{<:Pair}, itr, el::Pair, st) return Columns(dest_key => dest_data) end -function grow_to_columns!(dest::AbstractArray{T}, itr, elem = iterate(itr)) where {T} - # collect to dest array, checking the type of each result. if a result does not - # match, widen the result type and re-dispatch. - i = length(dest)+1 - while elem !== nothing - el, st = elem - if fieldwise_isa(el, T) - push!(dest, el) - elem = iterate(itr, st) - i += 1 - else - new = widencolumns(dest, i, el, T) - push!(new, el) - return grow_to_columns!(new, itr, iterate(itr, st)) - end - end - return dest -end - -# extra methods if we have widened to Vector{Tuple} or Vector{NamedTuple} -# better to not generate as this is the case where the user is sending heterogenoeus data -fieldwise_isa(el::S, ::Type{Tuple}) where {S<:Tup} = _is_subtype(S, Tuple) -fieldwise_isa(el::S, ::Type{NamedTuple}) where {S<:Tup} = _is_subtype(S, NamedTuple) - -@generated function fieldwise_isa(el::S, ::Type{T}) where {S<:Tup, T<:Tup} - if (fieldnames(S) == fieldnames(T)) && all(_is_subtype(s, t) for (s, t) in zip(fieldtypes(S), fieldtypes(T))) - return :(true) - else - return :(false) - end -end - -@generated function fieldwise_isa(el::S, ::Type{T}) where {S, T} - if _is_subtype(S, T) - return :(true) - else - return :(false) - end -end - -fieldwise_isa(el::Pair, ::Type{Pair{T1, T2}}) where {T1, T2} = - fieldwise_isa(el.first, T1) && fieldwise_isa(el.second, T2) - -function widencolumns(dest, i, el::S, ::Type{T}) where{S <: Tup, T<:Tup} - if fieldnames(S) != fieldnames(T) || T == Tuple || T == NamedTuple - R = (S <: Tuple) && (T <: Tuple) ? Tuple : (S <: NamedTuple) && (T <: NamedTuple) ? NamedTuple : Any - new = Array{R}(undef, length(dest)) - copyto!(new, 1, dest, 1, i-1) - else - sp, tp = fieldtypes(S), fieldtypes(T) - idx = findall(collect(!(s <: t) for (s, t) in zip(sp, tp))) - new = dest - for l in idx - newcol = Vector{promote_type(sp[l], tp[l])}(undef, length(dest)) - copyto!(newcol, 1, column(dest, l), 1, i-1) - new = setcol(new, l, newcol) - end - end - new -end - -function widencolumns(dest, i, el::S, ::Type{T}) where{S, T} - new = Vector{promote_type(S, T)}(undef, length(dest)) - copyto!(new, 1, dest, 1, i-1) - new -end - -function widencolumns(dest::Columns{<:Pair}, i, el::Pair, ::Type{Pair{T1, T2}}) where{T1, T2} - dest1 = fieldwise_isa(el.first, T1) ? columns(dest).first : widencolumns(columns(dest).first, i, el.first, T1) - dest2 = fieldwise_isa(el.second, T2) ? columns(dest).second : widencolumns(columns(dest).second, i, el.second, T2) - Columns(dest1 => dest2) -end diff --git a/test/test_collect.jl b/test/test_collect.jl index c6a52b8b..a4200669 100644 --- a/test/test_collect.jl +++ b/test/test_collect.jl @@ -10,16 +10,16 @@ @inferred IndexedTables.collect_to_columns!(dest, itr, 2, st) v = [(a = 1, b = 2), (a = 1.2, b = 3)] - @test collect_columns(v) == Columns((a = [1, 1.2], b = Int[2, 3])) - @test typeof(collect_columns(v)) == typeof(Columns((a = [1, 1.2], b = Int[2, 3]))) + @test collect_columns(v) == Columns((a = Real[1, 1.2], b = Int[2, 3])) + @test typeof(collect_columns(v)) == typeof(Columns((a = Real[1, 1.2], b = Int[2, 3]))) v = [(a = 1, b = 2), (a = 1.2, b = "3")] - @test collect_columns(v) == Columns((a = [1, 1.2], b = Any[2, "3"])) - @test typeof(collect_columns(v)) == typeof(Columns((a = [1, 1.2], b = Any[2, "3"]))) + @test collect_columns(v) == Columns((a = Real[1, 1.2], b = Any[2, "3"])) + @test typeof(collect_columns(v)) == typeof(Columns((a = Real[1, 1.2], b = Any[2, "3"]))) v = [(a = 1, b = 2), (a = 1.2, b = 2), (a = 1, b = "3")] - @test collect_columns(v) == Columns((a = [1, 1.2, 1], b = Any[2, 2, "3"])) - @test typeof(collect_columns(v)) == typeof(Columns((a = [1, 1.2, 1], b = Any[2, 2, "3"]))) + @test collect_columns(v) == Columns((a = Real[1, 1.2, 1], b = Any[2, 2, "3"])) + @test typeof(collect_columns(v)) == typeof(Columns((a = Real[1, 1.2, 1], b = Any[2, 2, "3"]))) # length unknown itr = Iterators.filter(isodd, 1:8) @@ -44,21 +44,21 @@ end @inferred collect_columns(v) v = [(1, 2), (1.2, 3)] - @test collect_columns(v) == Columns(([1, 1.2], Int[2, 3])) + @test collect_columns(v) == Columns((Real[1, 1.2], Int[2, 3])) v = [(1, 2), (1.2, "3")] - @test collect_columns(v) == Columns(([1, 1.2], Any[2, "3"])) - @test typeof(collect_columns(v)) == typeof(Columns(([1, 1.2], Any[2, "3"]))) + @test collect_columns(v) == Columns((Real[1, 1.2], Any[2, "3"])) + @test typeof(collect_columns(v)) == typeof(Columns((Real[1, 1.2], Any[2, "3"]))) v = [(1, 2), (1.2, 2), (1, "3")] - @test collect_columns(v) == Columns(([1, 1.2, 1], Any[2, 2, "3"])) + @test collect_columns(v) == Columns((Real[1, 1.2, 1], Any[2, 2, "3"])) # length unknown itr = Iterators.filter(isodd, 1:8) tuple_itr = ((i+1, i-1) for i in itr) @test collect_columns(tuple_itr) == Columns(([2, 4, 6, 8], [0, 2, 4, 6])) tuple_itr_real = (i == 1 ? (1.2, i-1) : (i+1, i-1) for i in itr) - @test collect_columns(tuple_itr_real) == Columns(([1.2, 4, 6, 8], [0, 2, 4, 6])) - @test typeof(collect_columns(tuple_itr_real)) == typeof(Columns(([1.2, 4, 6, 8], [0, 2, 4, 6]))) + @test collect_columns(tuple_itr_real) == Columns((Real[1.2, 4, 6, 8], [0, 2, 4, 6])) + @test typeof(collect_columns(tuple_itr_real)) == typeof(Columns((Real[1.2, 4, 6, 8], [0, 2, 4, 6]))) # empty itr = Iterators.filter(t -> t > 10, 1:8) @@ -82,7 +82,7 @@ end @test collect_columns(itr) == collect(itr) real_itr = (i == 1 ? 1.5 : i for i in itr) @test collect_columns(real_itr) == collect(real_itr) - @test eltype(collect_columns(real_itr)) == Float64 + @test eltype(collect_columns(real_itr)) == Real #empty itr = Iterators.filter(t -> t > 10, 1:8) @@ -104,8 +104,8 @@ end @test eltype(collect_columns(v)) == Pair{Int, Int} v = (i == 1 ? (1.2 => i+1) : (i => i+1) for i in 1:3) - @test collect_columns(v) == Columns([1.2,2,3]=>[2,3,4]) - @test eltype(collect_columns(v)) == Pair{Float64, Int} + @test collect_columns(v) == Columns(Real[1.2,2,3]=>[2,3,4]) + @test eltype(collect_columns(v)) == Pair{Real, Int} v = ((a=i,) => (b="a$i",) for i in 1:3) @test collect_columns(v) == Columns(Columns((a = [1,2,3],))=>Columns((b = ["a1","a2","a3"],))) @@ -128,21 +128,12 @@ end @test isequal(t, table((b = [1,1,1], a = [2, missing, 3]), pkey = :b)) end -@testset "issubtype" begin - @test IndexedTables._is_subtype(Int, Int) - @test IndexedTables._is_subtype(Int, Union{Missing, Int}) - @test !IndexedTables._is_subtype(Union{Missing, Int}, Int) - @test IndexedTables._is_subtype(Union{Missing, Int}, Union{Missing, Int}) - @test !IndexedTables._is_subtype(Union{Missing, Int}, Union{Missing,String}) - @test !IndexedTables._is_subtype(Int, String) -end - @testset "collectflattened" begin t = [(:a => [1, 2]), (:b => [1, 3])] @test collect_columns_flattened(t) == Columns([:a, :a, :b, :b] => [1, 2, 1, 3]) t = ([(a = 1,), (a = 2,)], [(a = 1.1,), (a = 2.2,)]) - @test collect_columns_flattened(t) == Columns(a = [1, 2, 1.1, 2.2]) - @test eltype(collect_columns_flattened(t)) == typeof((a=1.1,)) + @test collect_columns_flattened(t) == Columns(a = Real[1, 2, 1.1, 2.2]) + @test eltype(collect_columns_flattened(t)) == NamedTuple{(:a,), Tuple{Real}} t = [(:a => table(1:2, ["a", "b"])), (:b => table(3:4, ["c", "d"]))] @test table(collect_columns_flattened(t)) == table([:a, :a, :b, :b], 1:4, ["a", "b", "c", "d"], pkey = 1) end diff --git a/test/test_core.jl b/test/test_core.jl index 68ea1106..28889f4c 100644 --- a/test/test_core.jl +++ b/test/test_core.jl @@ -618,7 +618,7 @@ end t3 = map(x->ntuple(identity, x.x), t) @test isa(t3.data, Vector) - @test eltype(t3.data) == Tuple + @test eltype(t3.data) == Tuple{Int64,Int64,Int64,Int64,Vararg{Int64,N} where N} y = [1, 1//2, "x"] function foo(x) From dd7862ec8a18185ac4126bfe5c7d4150d046b7d1 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 16:34:22 +0000 Subject: [PATCH 19/21] Remove type piracy --- src/columns.jl | 2 -- src/tables.jl | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/columns.jl b/src/columns.jl index cef4071c..c00abbfc 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -1,7 +1,5 @@ # to get rid of eventually const Columns = StructVector -# There is a StackOverflow bug in this case in Base.unaliascopy -Base.copy(c::Columns{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c # IndexedTable-like API diff --git a/src/tables.jl b/src/tables.jl index 816aa767..e1684bba 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,10 +1,7 @@ #-----------------------------------------------------------------------# Columns const TableColumns = Columns{T} where {T<:NamedTuple} -# Columns(x; kw...) = Columns(Tables.columntable(x); kw...) - Tables.istable(::Type{<:TableColumns}) = true -Tables.materializer(c::TableColumns) = Columns Tables.rowaccess(c::TableColumns) = true Tables.rows(c::TableColumns) = c From 0458261483f8b0fc29d89e987dc7168137b88860 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 17:06:29 +0000 Subject: [PATCH 20/21] clean up tables integration --- src/columns.jl | 2 +- src/tables.jl | 15 +-------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/columns.jl b/src/columns.jl index c00abbfc..74226fba 100644 --- a/src/columns.jl +++ b/src/columns.jl @@ -51,8 +51,8 @@ available selection options and syntax. """ function columns end +columns(c::Columns) = fieldarrays(c) columns(c::Columns{<:Tuple}) = Tuple(fieldarrays(c)) -columns(c::Columns{<:NamedTuple}) = fieldarrays(c) columns(c::Columns{<:Pair}) = c.first => c.second """ diff --git a/src/tables.jl b/src/tables.jl index e1684bba..7bc93967 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,18 +1,5 @@ -#-----------------------------------------------------------------------# Columns -const TableColumns = Columns{T} where {T<:NamedTuple} - -Tables.istable(::Type{<:TableColumns}) = true - -Tables.rowaccess(c::TableColumns) = true -Tables.rows(c::TableColumns) = c -Tables.schema(c::TableColumns) = Tables.Schema(colnames(c), Tuple(map(eltype, c.columns))) - -Tables.columnaccess(c::TableColumns) = true -Tables.columns(c::TableColumns) = c.columns -# Tables.schema already defined for NamedTuple of Vectors (c.columns) - #-----------------------------------------------------------------------# IndexedTable -Tables.istable(::Type{IndexedTable{C}}) where {C<:TableColumns} = true +Tables.istable(::Type{IndexedTable{C}}) where {C<:Columns} = Tables.istable(C) Tables.materializer(t::IndexedTable) = table for f in [:rowaccess, :rows, :columnaccess, :columns, :schema] @eval Tables.$f(t::IndexedTable) = Tables.$f(Columns(columns(t))) From 3a0eeeb9693eaaa58ebc8165711a7248402ad6a0 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Tue, 18 Dec 2018 19:28:06 +0000 Subject: [PATCH 21/21] lower bound structarray --- REQUIRE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REQUIRE b/REQUIRE index c61eff94..4780ae6e 100644 --- a/REQUIRE +++ b/REQUIRE @@ -6,4 +6,4 @@ TableTraits 0.3.0 TableTraitsUtils 0.2.0 IteratorInterfaceExtensions 0.1.0 Tables -StructArrays +StructArrays 0.2.0