From 5e4fb7248b8ffc2659e5342dfe01d0de6b0c1476 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Sun, 16 Dec 2018 13:29:12 +0000
Subject: [PATCH 01/21] wip

---
 REQUIRE        |   3 +-
 src/columns.jl | 266 +++++++++----------------------------------------
 2 files changed, 51 insertions(+), 218 deletions(-)

diff --git a/REQUIRE b/REQUIRE
index 8bf6de22..c61eff94 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -5,4 +5,5 @@ WeakRefStrings 0.4.4
 TableTraits 0.3.0
 TableTraitsUtils 0.2.0
 IteratorInterfaceExtensions 0.1.0
-Tables
\ No newline at end of file
+Tables
+StructArrays
diff --git a/src/columns.jl b/src/columns.jl
index 9b443671..a0336e61 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -1,47 +1,9 @@
-"""
-Wrapper around a (named) tuple of Vectors that acts like a Vector of (named) tuples.
-
-# Fields:
-
-- `columns`: a (named) tuple of Vectors. Also `columns(x)`
-"""
-struct Columns{D<:Union{Tup, Pair}, C<:Union{Tup, Pair}} <: AbstractVector{D}
-    columns::C
-
-    function Columns{D,C}(c) where {D<:Tup,C<:Tup}
-        if !isempty(c)
-            n = length(c[1])
-            for i = 2:length(c)
-                length(c[i]) == n || error("all columns must have same length")
-            end
-        end
-        new{D,C}(c)
-    end
-
-    function Columns{D,C}(c::Pair) where {D<:Pair,C<:Pair{<:AbstractVector, <:AbstractVector}}
-        length(c.first) == length(c.second) || error("all columns must have same length")
-        new{D,C}(c)
-    end
-end
-
-function Columns(cols::AbstractVector...; names::Union{Vector,Tuple{Vararg{Any}},Nothing}=nothing)
-    if isa(names, Nothing) || any(x->!(x isa Symbol), names)
-        Columns{eltypes(typeof(cols)),typeof(cols)}(cols)
-    else
-        dt = NamedTuple{(names...,), Tuple{map(eltype, cols)...}}
-        ct = NamedTuple{(names...,), Tuple{map(typeof, cols)...}}
-        Columns{dt,ct}(ct((cols...,)))
-    end
-end
-
-function Columns(; kws...)
-    Columns(values(kws)..., names=collect(keys(kws)))
-end
-
-Columns(c::Union{Tup, Pair}) = Columns{eltypes(typeof(c)),typeof(c)}(c)
+using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray
 
+# to get rid of eventually
+const Columns = StructVector
 # There is a StackOverflow bug in this case in Base.unaliascopy
-Base.copy(c::Columns{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c
+Base.copy(c::StructVector{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c
 
 # IndexedTable-like API
 
@@ -69,8 +31,8 @@ function colnames end
 Base.@pure colnames(t::AbstractVector) = (1,)
 columns(v::AbstractVector) = v
 
-Base.@pure colnames(t::Columns) = fieldnames(eltype(t))
-Base.@pure colnames(t::Columns{<:Pair, <:Pair}) = colnames(t.columns.first) => colnames(t.columns.second)
+Base.@pure colnames(t::StructVector) = fieldnames(eltype(t))
+Base.@pure colnames(t::StructVector{<:Pair, <:Pair}) = colnames(t.first) => colnames(t.second)
 
 """
     columns(itr, select::Selection = All())
@@ -93,16 +55,9 @@ available selection options and syntax.
 """
 function columns end
 
-columns(c::Columns) = c.columns
-
-# Array-like API
-
-eltype(::Type{Columns{D,C}}) where {D,C} = D
-function length(c::Columns)
-    isempty(c.columns) ? 0 : length(c.columns[1])
-end
-length(c::Columns{<:Pair, <:Pair}) = length(c.columns.first)
-ndims(c::Columns) = 1
+columns(c::StructVector{<:Tuple}) = Tuple(fieldarrays(c))
+columns(c::StructVector{<:NamedTuple}) = fieldarrays(c)
+columns(c::StructVector{<:Pair}) = c.first => c.second
 
 """
     ncols(itr)
@@ -115,149 +70,19 @@ Returns the number of columns in `itr`.
     ncols(rows(([1,2,3],[4,5,6]))) == 2
 """
 function ncols end
-ncols(c::Columns) = fieldcount(typeof(c.columns))
-ncols(c::Columns{<:Pair, <:Pair}) = ncols(c.columns.first) => ncols(c.columns.second)
+ncols(c::StructVector) = fieldcount(fieldarrays(c))
+ncols(c::StructVector{<:Pair}) = ncols(c.first) => ncols(c.second)
 ncols(c::AbstractArray) = 1
 
-size(c::Columns) = (length(c),)
-Base.IndexStyle(::Type{<:Columns}) = IndexLinear()
-summary(c::Columns{D}) where {D<:Tuple} = "$(length(c))-element Columns{$D}"
-
-empty!(c::Columns) = (foreach(empty!, c.columns); c)
-empty!(c::Columns{<:Pair, <:Pair}) = (foreach(empty!, c.columns.first.columns); foreach(empty!, c.columns.second.columns); c)
-
-function similar(c::Columns{D,C}) where {D,C}
-    cols = _map(similar, c.columns)
-    Columns{D,typeof(cols)}(cols)
-end
-
-function similar(c::Columns{D,C}, n::Integer) where {D,C}
-    cols = _map(a->similar(a,n), c.columns)
-    Columns{D,typeof(cols)}(cols)
-end
-
-function Base.similar(::Type{T}, n::Int)::T where {T<:Columns}
-    T_cols = T.parameters[2]
-    if T_cols <: Pair
-        return Columns(similar(T_cols.parameters[1], n) => similar(T_cols.parameters[2], n))
-    end
-    f = T_cols <: Tuple ? tuple : T_cols∘tuple
-    T(f(map(t->similar(t, n), fieldtypes(T_cols))...))
-end
-
-function convert(::Type{Columns}, x::AbstractArray{<:NTuple{N,Any}}) where N
-    eltypes = (eltype(x).parameters...,)
-    copyto!(Columns(map(t->Vector{t}(undef, length(x)), eltypes)), x)
-end
-
-function convert(::Type{Columns}, x::AbstractArray{<:NamedTuple{names, typs}}) where {names,typs}
-    eltypes = typs.parameters
-    copyto!(Columns(map(t->Vector{t}(undef, length(x)), eltypes)..., names=fieldnames(eltype(x))), x)
-end
-
+summary(c::StructVector{D}) where {D<:Tuple} = "$(length(c))-element StructVector{$D}"
 
-getindex(c::Columns{D}, i::Integer) where {D<:Tuple} = ith_all(i, c.columns)
-getindex(c::Columns{D}, i::Integer) where {D<:NamedTuple} = D(ith_all(i, c.columns))
-getindex(c::Columns{D}, i::Integer) where {D<:Pair} = getindex(c.columns.first, i) => getindex(c.columns.second, i)
+_sizehint!(c::StructVector, n::Integer) = (foreachfield(x->_sizehint!(x,n), c); c)
 
-getindex(c::Columns, p::AbstractVector) = Columns(_map(c->c[p], c.columns))
-
-view(c::Columns, I) = Columns(_map(a->view(a, I), c.columns))
-
-@inline setindex!(I::Columns, r::Union{Tup, Pair}, i::Integer) = (foreach((c,v)->(c[i]=v), I.columns, r); I)
-
-@inline push!(I::Columns, r::Union{Tup, Pair}) = (foreach(push!, I.columns, r); I)
-
-append!(I::Columns, J::Columns) = (foreach(append!, I.columns, J.columns); I)
-
-copyto!(I::Columns, J::Columns) = (foreach(copyto!, I.columns, J.columns); I)
-
-resize!(I::Columns, n::Int) = (foreach(c->resize!(c,n), I.columns); I)
-
-_sizehint!(c::Columns, n::Integer) = (foreach(c->_sizehint!(c,n), c.columns); c)
-
-==(x::Columns, y::Columns) = x.columns == y.columns
-
-function _strip_pair(c::Columns{<:Pair})
-    f, s = map(columns, c.columns)
+function _strip_pair(c::StructVector{<:Pair})
+    f, s = map(columns, fieldarrays(c))
     (f isa AbstractVector) && (f = (f,))
     (s isa AbstractVector) && (s = (s,))
-    Columns(f..., s...)
-end
-
-function sortperm(c::Columns)
-    cols = c.columns
-    x = cols[1]
-    if (eltype(x) <: AbstractString && !(x isa PooledArray)) || length(cols) > 1
-        pa = PooledArray(compact_mem(x))
-        p = sortperm_fast(pa)
-    else
-        p = sortperm_fast(x)
-    end
-    if length(cols) > 1
-        y = cols[2]
-        refine_perm!(p, cols, 1, compact_mem(x), compact_mem(y), 1, length(x))
-    end
-    return p
-end
-
-sortperm(c::Columns{<:Pair}) = sortperm(_strip_pair(c))
-
-issorted(c::Columns) = issorted(1:length(c), lt=(x,y)->rowless(c, x, y))
-issorted(c::Columns{<:Pair}) = issorted(_strip_pair(c))
-
-# assuming x[p] is sorted, sort by remaining columns where x[p] is constant
-function refine_perm!(p, cols, c, x, y, lo, hi)
-    temp = similar(p, 0)
-    order = Base.Order.By(j->(@inbounds k=y[j]; k))
-    nc = length(cols)
-    i = lo
-    while i < hi
-        i1 = i+1
-        @inbounds while i1 <= hi && roweq(x, p[i1], p[i])
-            i1 += 1
-        end
-        i1 -= 1
-        if i1 > i
-            sort_sub_by!(p, i, i1, y, order, temp)
-            if c < nc-1
-                z = cols[c+2]
-                refine_perm!(p, cols, c+1, compact_mem(y), compact_mem(z), i, i1)
-            end
-        end
-        i = i1+1
-    end
-end
-
-function permute!(c::Columns, p::AbstractVector)
-    for v in c.columns
-        if isa(v, PooledArrays.PooledArray) || isa(v, StringArray{String})
-            permute!(v, p)
-        else
-            copyto!(v, v[p])
-        end
-    end
-    return c
-end
-permute!(c::Columns{<:Pair}, p::AbstractVector) = (permute!(c.columns.first, p); permute!(c.columns.second, p); c)
-sort!(c::Columns) = permute!(c, sortperm(c))
-sort(c::Columns) = c[sortperm(c)]
-
-function Base.vcat(c::Columns, cs::Columns...)
-    fns = map(fieldnames∘typeof, (map(x->x.columns, (c, cs...))))
-    f1 = fns[1]
-    for f2 in fns[2:end]
-        if f1 != f2
-            errfields = join(map(string, fns), ", ", " and ")
-            throw(ArgumentError("Cannot concatenate columns with fields $errfields"))
-        end
-    end
-    Columns(map(vcat, map(x->x.columns, (c,cs...))...))
-end
-
-function Base.vcat(c::Columns{<:Pair}, cs::Columns{<:Pair}...)
-    Columns(vcat(c.columns.first, (x.columns.first for x in cs)...) =>
-            vcat(c.columns.second, (x.columns.second for x in cs)...))
+    StructVector((f..., s...))
 end
 
 # fused indexing operations
@@ -273,18 +98,18 @@ end
 
 # row operations
 
-copyrow!(I::Columns, i, src) = foreach(c->copyelt!(c, i, src), I.columns)
-copyrow!(I::Columns, i, src::Columns, j) = foreach((c1,c2)->copyelt!(c1, i, c2, j), I.columns, src.columns)
+copyrow!(I::StructVector, i, src) = foreachfield(c->copyelt!(c, i, src), I)
+copyrow!(I::StructVector, i, src::StructVector, j) = foreachfield((c1,c2)->copyelt!(c1, i, c2, j), I, src)
 copyrow!(I::AbstractArray, i, src::AbstractArray, j) = (@inbounds I[i] = src[j])
-pushrow!(to::Columns, from::Columns, i) = foreach((a,b)->push!(a, b[i]), to.columns, from.columns)
+pushrow!(to::StructVector, from::StructVector, i) = foreachfield((a,b)->push!(a, b[i]), to, from)
 pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i])
 
-@generated function rowless(c::Columns{D,C}, i, j) where {D,C}
+@generated function rowless(c::StructVector{D,C}, i, j) where {D,C}
     N = fieldcount(C)
-    ex = :(cmpelts(getfield(c.columns,$N), i, j) < 0)
+    ex = :(cmpelts(getfield(fieldarrays(c),$N), i, j) < 0)
     for n in N-1:-1:1
         ex = quote
-            let d = cmpelts(getfield(c.columns,$n), i, j)
+            let d = cmpelts(getfield(fieldarrays(c),$n), i, j)
                 (d == 0) ? ($ex) : (d < 0)
             end
         end
@@ -292,11 +117,11 @@ pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i])
     ex
 end
 
-@generated function roweq(c::Columns{D,C}, i, j) where {D,C}
+@generated function roweq(c::StructVector{D,C}, i, j) where {D,C}
     N = fieldcount(C)
-    ex = :(cmpelts(getfield(c.columns,1), i, j) == 0)
+    ex = :(cmpelts(getfield(fieldarrays(c),1), i, j) == 0)
     for n in 2:N
-        ex = :(($ex) && (cmpelts(getfield(c.columns,$n), i, j)==0))
+        ex = :(($ex) && (cmpelts(getfield(fieldarrays(c),$n), i, j)==0))
     end
     ex
 end
@@ -305,12 +130,12 @@ end
 
 # uses number of columns from `d`, assuming `c` has more or equal
 # dimensions, for broadcast joins.
-@generated function rowcmp(c::Columns, i, d::Columns{D}, j) where D
+@generated function rowcmp(c::StructVector, i, d::StructVector{D}, j) where D
     N = fieldcount(D)
-    ex = :(cmp(getfield(c.columns,$N)[i], getfield(d.columns,$N)[j]))
+    ex = :(cmp(getfield(fieldarrays(c),$N)[i], getfield(fieldarrays(d),$N)[j]))
     for n in N-1:-1:1
         ex = quote
-            let k = cmp(getfield(c.columns,$n)[i], getfield(d.columns,$n)[j])
+            let k = cmp(getfield(fieldarrays(c),$n)[i], getfield(fieldarrays(d),$n)[j])
                 (k == 0) ? ($ex) : k
             end
         end
@@ -326,18 +151,18 @@ end
 # all columns are equal except left >= right in last column.
 # Could be generalized to some number of trailing columns, but I don't
 # know whether that has applications.
-@generated function row_asof(c::Columns{D,C}, i, d::Columns{D,C}, j) where {D,C}
+@generated function row_asof(c::StructVector{D,C}, i, d::StructVector{D,C}, j) where {D,C}
     N = length(C.parameters)
     if N == 1
-        ex = :(!isless(getfield(c.columns,1)[i], getfield(d.columns,1)[j]))
+        ex = :(!isless(getfield(fieldarrays(c),1)[i], getfield(fieldarrays(d),1)[j]))
     else
-        ex = :(isequal(getfield(c.columns,1)[i], getfield(d.columns,1)[j]))
+        ex = :(isequal(getfield(fieldarrays(c),1)[i], getfield(fieldarrays(d),1)[j]))
     end
     for n in 2:N
         if N == n
-            ex = :(($ex) && !isless(getfield(c.columns,$n)[i], getfield(d.columns,$n)[j]))
+            ex = :(($ex) && !isless(getfield(fieldarrays(c),$n)[i], getfield(fieldarrays(d),$n)[j]))
         else
-            ex = :(($ex) && isequal(getfield(c.columns,$n)[i], getfield(d.columns,$n)[j]))
+            ex = :(($ex) && isequal(getfield(fieldarrays(c),$n)[i], getfield(fieldarrays(d),$n)[j]))
         end
     end
     ex
@@ -357,11 +182,11 @@ elementwise. Collect output as `Columns` if `f` returns
     map_rows(i -> (exp = exp(i), log = log(i)), 1:5)
 """
 function map_rows(f, iters...)
-    collect_columns(f(i...) for i in zip(iters...))
+    collect_structarray(f(i...) for i in zip(iters...))
 end
 
 # 1-arg case
-map_rows(f, iter) = collect_columns(f(i) for i in iter)
+map_rows(f, iter) = collect_structarray(f(i) for i in iter)
 
 ## Special selectors to simplify column selector
 
@@ -496,8 +321,8 @@ end
 column(c, x) = columns(c)[colindex(c, x)]
 
 # optimized method
-@inline function column(c::Columns, x::Union{Int, Symbol})
-    getfield(c.columns, x)
+@inline function column(c::StructVector, x::Union{Int, Symbol})
+    getfield(fieldarrays(c), x)
 end
 
 column(t, a::AbstractArray) = a
@@ -568,11 +393,11 @@ the [`select`](@ref) function for selection options and syntax.
 function rows end
 
 rows(x::AbstractVector) = x
-rows(cols::Tup) = Columns(cols)
+rows(cols::Tup) = StructVector(cols)
 
 rows(t, which...) = rows(columns(t, which...))
 
-_cols_tuple(xs::Columns) = columns(xs)
+_cols_tuple(xs::StructVector) = columns(xs)
 _cols_tuple(xs::AbstractArray) = (xs,)
 concat_cols(xs, ys) = rows(concat_tup(_cols_tuple(xs), _cols_tuple(ys)))
 
@@ -602,8 +427,15 @@ function ColDict(t; copy=nothing)
     ColDict(Int[], t, convert(Array{Any}, collect(cnames)), Any[columns(t)...], copy)
 end
 
-function Base.getindex(d::ColDict{<:Columns})
-    Columns(d.columns...; names=d.names)
+function structvector_columnsnames(cols::AbstractVector...; names = Symbol[])
+    if all(t -> isa(t, Symbol), names) && length(names) == length(cols)
+        StructVector(NamedTuple{Tuple(names)}(cols))
+    else
+        StructVector(cols)
+    end
+end
+function Base.getindex(d::ColDict{<:StructVector})
+    structvector_columnsnames(d.columns; names=d.names)
 end
 
 Base.getindex(d::ColDict, key) = rows(d[], key)
@@ -934,4 +766,4 @@ end
 
 ### utils
 
-compact_mem(x::Columns) = Columns(map(compact_mem, columns(x)))
+compact_mem(x::StructVector) = StructVector(map(compact_mem, columns(x)))

From c79b9735065c4ea83bbc14befc29949b8c78bfec Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Sun, 16 Dec 2018 13:54:33 +0000
Subject: [PATCH 02/21] temporarily remove extra column method

---
 src/tables.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tables.jl b/src/tables.jl
index eedb264b..816aa767 100644
--- a/src/tables.jl
+++ b/src/tables.jl
@@ -1,7 +1,7 @@
 #-----------------------------------------------------------------------# Columns 
 const TableColumns = Columns{T} where {T<:NamedTuple}
 
-Columns(x; kw...) = Columns(Tables.columntable(x); kw...)
+# Columns(x; kw...) = Columns(Tables.columntable(x); kw...)
 
 Tables.istable(::Type{<:TableColumns}) = true
 Tables.materializer(c::TableColumns) = Columns

From 1eac5fae70a8dc4c6a619a2adaf7b70e1908522a Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Sun, 16 Dec 2018 15:51:24 +0000
Subject: [PATCH 03/21] update arrayof

---
 src/IndexedTables.jl | 2 ++
 src/columns.jl       | 2 --
 src/utils.jl         | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/IndexedTables.jl b/src/IndexedTables.jl
index 9cbee999..29ce44c6 100644
--- a/src/IndexedTables.jl
+++ b/src/IndexedTables.jl
@@ -14,6 +14,8 @@ import Base:
     tuple_type_cons, tuple_type_head, tuple_type_tail, in, convert
 
 
+using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema
+
 #-----------------------------------------------------------------------# exports
 export 
     # macros
diff --git a/src/columns.jl b/src/columns.jl
index a0336e61..d434178e 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -1,5 +1,3 @@
-using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray
-
 # to get rid of eventually
 const Columns = StructVector
 # There is a StackOverflow bug in this case in Base.unaliascopy
diff --git a/src/utils.jl b/src/utils.jl
index a2465847..08a22afe 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -168,18 +168,18 @@ Base.@pure function arrayof(S)
     if T == Union{}
         Vector{Union{}}
     elseif T<:Tuple
-        Columns{T, Tuple{map(arrayof, fieldtypes(T))...}}
+        StructVector{T, staticschema(Tuple{map(arrayof, fieldtypes(T))...})}
     elseif T<:NamedTuple
         if fieldcount(T) == 0
-            Columns{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}}
+            StructVector{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}}
         else
-            Columns{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}}
+            StructVector{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}}
         end
     elseif (T<:Union{Missing,String,WeakRefString} && Missing<:T) ||
         T<:Union{String, WeakRefString}
         StringArray{T, 1}
     elseif T<:Pair
-        Columns{T, Pair{map(arrayof, T.parameters)...}}
+        StructVector{T, NamedTuple{(:first, :second), Tuple{map(arrayof, T.parameters)...}}}
     else
         Vector{T}
     end

From d41ed577985c994c07e035c155436f8bf1b159e4 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Sun, 16 Dec 2018 16:33:59 +0000
Subject: [PATCH 04/21] fix ncols

---
 src/columns.jl      | 2 +-
 src/indexedtable.jl | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/columns.jl b/src/columns.jl
index d434178e..f1844f75 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -68,7 +68,7 @@ Returns the number of columns in `itr`.
     ncols(rows(([1,2,3],[4,5,6]))) == 2
 """
 function ncols end
-ncols(c::StructVector) = fieldcount(fieldarrays(c))
+ncols(c::StructVector{T, C}) where {T, C} = fieldcount(C)
 ncols(c::StructVector{<:Pair}) = ncols(c.first) => ncols(c.second)
 ncols(c::AbstractArray) = 1
 
diff --git a/src/indexedtable.jl b/src/indexedtable.jl
index dab871ce..0c7efede 100644
--- a/src/indexedtable.jl
+++ b/src/indexedtable.jl
@@ -384,12 +384,12 @@ keyword arguments will be forwarded to [`table`](@ref) constructor.
 # Example
     convert(IndexedTable, Columns(x=[1,2],y=[3,4]), Columns(z=[1,2]), presorted=true)
 """
-function convert(::Type{IndexedTable}, key, val; kwargs...)
+function convert(::Type{IndexedTable}, key, val; pkey = (), kwargs...)
     cs = concat_cols(key, val)
-    table(cs, pkey=[1:ncols(key);]; kwargs...)
+    table(cs; pkey=Tuple(1:ncols(key)), kwargs...)
 end
 
-convert(T::Type{IndexedTable}, c::Columns{<:Pair}; kwargs...) = convert(T, c.columns.first, c.columns.second; kwargs...)
+convert(T::Type{IndexedTable}, c::Columns{<:Pair}; kwargs...) = convert(T, c.first, c.second; kwargs...)
 # showing
 
 global show_compact_when_wide = true

From 828cee598d488f3e567b41fea99b56d827262993 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Sun, 16 Dec 2018 16:36:48 +0000
Subject: [PATCH 05/21] fix ndims

---
 src/ndsparse.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ndsparse.jl b/src/ndsparse.jl
index 6109e431..cd0d4467 100644
--- a/src/ndsparse.jl
+++ b/src/ndsparse.jl
@@ -209,7 +209,7 @@ _convert(::Type{<:Tuple}, tup::Tuple) = tup
 _convert(::Type{T}, tup::Tuple) where {T<:NamedTuple} = T(tup)
 convertkey(t::NDSparse{V,K,I}, tup::Tuple) where {V,K,I} = _convert(eltype(I), tup)
 
-ndims(t::NDSparse) = length(t.index.columns)
+ndims(t::NDSparse) = length(fieldarrays(t.index))
 length(t::NDSparse) = (flush!(t);length(t.index))
 eltype(::Type{NDSparse{T,D,C,V}}) where {T,D,C,V} = T
 Base.keytype(::Type{NDSparse{T,D,C,V}}) where {T,D,C,V} = D

From 46d9d7a27150bad6913c341be76bdb2915cb93a8 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Sun, 16 Dec 2018 16:48:56 +0000
Subject: [PATCH 06/21] fix stack unstack

---
 src/reshape.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/reshape.jl b/src/reshape.jl
index aa540c10..db014093 100644
--- a/src/reshape.jl
+++ b/src/reshape.jl
@@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t
     labelcol = [label for i in 1:length(t) for label in labels]
 
     bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by))
-    convert(collectiontype(D), Columns(bycols), Columns(labelcol, valuecol, names = [variable, value]))
+    convert(collectiontype(D), StructVector(bycols), structvector_columnsnames(labelcol, valuecol, names = [variable, value]))
 end
 
 """
@@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value)
 end
 
 function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S}
-    dest_val = Columns((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
+    dest_val = structvector_columnsnames((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
     for (i, el) in enumerate(val)
         for (k, v) in el
             ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed")

From 43734214807f8e5a12f27c1c331f75146443754d Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 18:09:42 +0000
Subject: [PATCH 07/21] fix more columns

---
 src/join.jl       | 12 ++++++------
 test/test_core.jl | 28 ++++++++++++++--------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/join.jl b/src/join.jl
index 1ac36a2b..0f86b83b 100644
--- a/src/join.jl
+++ b/src/join.jl
@@ -342,7 +342,7 @@ function Base.join(f, left::Dataset, right::Dataset;
         lnulls = zeros(Bool, length(lout))
         lnulls[lnull_idx] .= true
         lout = if lout isa Columns
-            Columns(map(lout.columns) do col
+            Columns(map(columns(lout)) do col
                 v = convert(Vector{Union{Missing, eltype(col)}}, col)
                 v[lnull_idx] .= missing
                 v
@@ -359,7 +359,7 @@ function Base.join(f, left::Dataset, right::Dataset;
         rnulls = zeros(Bool, length(rout))
         rnulls[rnull_idx] .= true
         rout = if rout isa Columns
-            Columns(map(rout.columns) do col
+            Columns(map(columns(rout)) do col
                 v = convert(Vector{Union{Missing, eltype(col)}}, col)
                 v[rnull_idx] .= missing
                 v
@@ -512,7 +512,7 @@ function count_overlap(I::Columns{D}, J::Columns{D}) where D
 end
 
 function promoted_similar(x::Columns, y::Columns, n)
-    Columns(map((a,b)->promoted_similar(a, b, n), x.columns, y.columns))
+    Columns(map((a,b)->promoted_similar(a, b, n), columns(x), columns(y)))
 end
 
 function promoted_similar(x::AbstractArray, y::AbstractArray, n)
@@ -697,7 +697,7 @@ function find_corresponding(Ap, Bp)
 end
 
 function match_indices(A::NDSparse, B::NDSparse)
-    if isa(A.index.columns, NamedTuple) && isa(B.index.columns, NamedTuple)
+    if isa(columns(A.index), NamedTuple) && isa(columns(B.index), NamedTuple)
         Ap = colnames(A.index)
         Bp = colnames(B.index)
     else
@@ -786,7 +786,7 @@ function _broadcast!(f::Function, A::NDSparse, B::NDSparse, C::NDSparse; dimmap=
     end
     common = filter(i->C_inds[i] > 0, 1:ndims(A))
     C_common = C_inds[common]
-    B_common_cols = Columns(getsubfields(B.index.columns, common))
+    B_common_cols = Columns(getsubfields(columns(B.index), common))
     B_perm = sortperm(B_common_cols)
     if C_common == C_dims
         idx, iperm = _bcast_loop!(f, values(A), B, C, B_common_cols, B_perm)
@@ -797,7 +797,7 @@ function _broadcast!(f::Function, A::NDSparse, B::NDSparse, C::NDSparse; dimmap=
         end
     else
         # TODO
-        #C_perm = sortperm(Columns(C.index.columns[[C_common...]]))
+        #C_perm = sortperm(Columns(columns(C.index)[[C_common...]]))
         error("dimensions of one argument to `broadcast` must be a subset of the dimensions of the other")
     end
     return A
diff --git a/test/test_core.jl b/test/test_core.jl
index e4c17936..b91ccf5d 100644
--- a/test/test_core.jl
+++ b/test/test_core.jl
@@ -11,8 +11,8 @@
     @test map_rows(tuple, 1:3, ["a","b","c"]) == Columns([1,2,3], ["a","b","c"])
 
  c = Columns(Columns((a=[1,2,3],)) => Columns((b=["a","b","c"],)))
-    @test c.columns.first == Columns((a=[1,2,3],))
-    @test c.columns.second == Columns((b=["a","b","c"],))
+    @test columns(c).first == Columns((a=[1,2,3],))
+    @test columns(c).second == Columns((b=["a","b","c"],))
     @test colnames(c) == ((:a,) => (:b,))
     @test length(c) == 3
     @test ncols(c) == (1 => 1)
@@ -107,10 +107,10 @@ end
     @test broadcast(*, nd, ndv) == convert(NDSparse, S .* v)
     # test matching dimensions by name
     ndt0 = convert(NDSparse, sparse(S .* (v')))
-    ndt = NDSparse(Columns(a=ndt0.index.columns[1], b=ndt0.index.columns[2]), ndt0.data, presorted=true)
+    ndt = NDSparse(Columns(a=columns(ndt0.index)[1], b=columns(ndt0.index)[2]), ndt0.data, presorted=true)
     @test broadcast(*,
-                    NDSparse(Columns(a=nd.index.columns[1], b=nd.index.columns[2]), nd.data),
-                    NDSparse(Columns(b=ndv.index.columns[1]), ndv.data)) == ndt
+                    NDSparse(Columns(a=columns(nd.index)[1], b=columns(nd.index)[2]), nd.data),
+                    NDSparse(Columns(b=columns(ndv.index)[1]), ndv.data)) == ndt
 
 let a = rand(10), b = rand(10), c = rand(10)
     @test NDSparse(a, b, c) == NDSparse(a, b, c)
@@ -195,7 +195,7 @@ for a in (rand(2,2), rand(3,5))
     end
 end
 
-_colnames(x::NDSparse) = keys(x.index.columns)
+_colnames(x::NDSparse) = keys(columns(x.index))
 
 @test _colnames(NDSparse(ones(2),ones(2),ones(2),names=[:a,:b])) == (:a, :b)
 @test _colnames(NDSparse(Columns(x=ones(2),y=ones(2)), ones(2))) == (:x, :y)
@@ -304,23 +304,23 @@ end
     t = table(cs)
     @test t.pkey == Int[]
     @test t.columns == [(1,2)]
-    @test column(t.columns,1) !== cs.columns[1]
+    @test column(t.columns,1) !== columns(cs)[1]
     t = table(cs, copy=false)
-    @test column(t.columns,1) === cs.columns[1]
+    @test column(t.columns,1) === columns(cs)[1]
     t = table(cs, copy=false, pkey=[1])
-    @test column(t.columns,1) === cs.columns[1]
+    @test column(t.columns,1) === columns(cs)[1]
     cs = Columns([2, 1], [3,4])
     t = table(cs, copy=false, pkey=[1])
     @test t.pkey == Int[1]
     cs = Columns([2, 1], [3,4])
     t = table(cs, copy=false, pkey=[1])
-    @test column(t.columns,1) === cs.columns[1]
+    @test column(t.columns,1) === columns(cs)[1]
     @test t.pkey == Int[1]
     @test t.columns == [(1,4), (2,3)]
 
     cs = Columns(x=[2, 1], y=[3,4])
     t = table(cs, copy=false, pkey=:x)
-    @test column(t.columns,1) === cs.columns.x
+    @test column(t.columns,1) === columns(cs).x
     @test t.pkey == Int[1]
     @test t.columns == [(x=1,y=4), (x=2,y=3)]
 
@@ -614,7 +614,7 @@ end
 
     t2 = map(x->(x.x,x.x^2), t)
     @test isa(t2.data, Columns)
-    @test isa(t2.data.columns, Tuple{Vector{Int}, Vector{Int}})
+    @test isa(columns(t2.data), Tuple{Vector{Int}, Vector{Int}})
 
     t3 = map(x->ntuple(identity, x.x), t)
     @test isa(t3.data, Vector)
@@ -1054,10 +1054,10 @@ end
     C = rand(3,3)
     nA = convert(NDSparse, A)
     nB = convert(NDSparse, B)
-    nB.index.columns[1][:] .+= 3
+    columns(nB.index)[1][:] .+= 3
     @test merge(nA,nB) == convert(NDSparse, vcat(A,B))
     nC = convert(NDSparse, C)
-    nC.index.columns[1][:] .+= 6
+    columns(nC.index)[1][:] .+= 6
     @test merge(nA,nB,nC) == merge(nA,nC,nB) == convert(NDSparse, vcat(A,B,C))
     merge!(nA,nB)
     @test nA == convert(NDSparse, vcat(A,B))

From 91b2c333703e190eef66d523ebf72c17452ab6bc Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 18:39:32 +0000
Subject: [PATCH 08/21] use new StructVector method

---
 src/columns.jl | 11 ++---------
 src/reshape.jl |  4 ++--
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/columns.jl b/src/columns.jl
index f1844f75..1078cacc 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -30,7 +30,7 @@ Base.@pure colnames(t::AbstractVector) = (1,)
 columns(v::AbstractVector) = v
 
 Base.@pure colnames(t::StructVector) = fieldnames(eltype(t))
-Base.@pure colnames(t::StructVector{<:Pair, <:Pair}) = colnames(t.first) => colnames(t.second)
+Base.@pure colnames(t::StructVector{<:Pair}) = colnames(t.first) => colnames(t.second)
 
 """
     columns(itr, select::Selection = All())
@@ -425,15 +425,8 @@ function ColDict(t; copy=nothing)
     ColDict(Int[], t, convert(Array{Any}, collect(cnames)), Any[columns(t)...], copy)
 end
 
-function structvector_columnsnames(cols::AbstractVector...; names = Symbol[])
-    if all(t -> isa(t, Symbol), names) && length(names) == length(cols)
-        StructVector(NamedTuple{Tuple(names)}(cols))
-    else
-        StructVector(cols)
-    end
-end
 function Base.getindex(d::ColDict{<:StructVector})
-    structvector_columnsnames(d.columns; names=d.names)
+    Columns(d.columns...; names=d.names)
 end
 
 Base.getindex(d::ColDict, key) = rows(d[], key)
diff --git a/src/reshape.jl b/src/reshape.jl
index db014093..7960c4aa 100644
--- a/src/reshape.jl
+++ b/src/reshape.jl
@@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t
     labelcol = [label for i in 1:length(t) for label in labels]
 
     bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by))
-    convert(collectiontype(D), StructVector(bycols), structvector_columnsnames(labelcol, valuecol, names = [variable, value]))
+    convert(collectiontype(D), StructVector(bycols), StructVector(labelcol, valuecol, names = [variable, value]))
 end
 
 """
@@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value)
 end
 
 function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S}
-    dest_val = structvector_columnsnames((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
+    dest_val = StructVector((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
     for (i, el) in enumerate(val)
         for (k, v) in el
             ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed")

From 2b0d7d97f8506ad86c9758c1ff70f2ce93e07544 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 19:23:58 +0000
Subject: [PATCH 09/21] fix more columns

---
 src/ndsparse.jl | 12 ++++++------
 src/reduce.jl   |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/ndsparse.jl b/src/ndsparse.jl
index cd0d4467..44752251 100644
--- a/src/ndsparse.jl
+++ b/src/ndsparse.jl
@@ -88,7 +88,7 @@ function ndsparse(::Val{:serial}, ks::Tup, vs::Union{Tup, AbstractVector};
     elseif copy
         if agg !== nothing
             iter = GroupReduce(agg, I, d, Base.OneTo(length(I)))
-            I, d = collect_columns(iter).columns
+            I, d = collect_columns(iter) |> columns
             agg = nothing
         else
             I = Base.copy(I)
@@ -117,7 +117,7 @@ function ndsparse(x::Columns, y::AbstractVector; kwargs...)
 end
 
 ndsparse(c::Columns{<:Pair}; kwargs...) =
-    convert(NDSparse, c.columns.first, c.columns.second; kwargs...)
+    convert(NDSparse, columns(c).first, columns(c).second; kwargs...)
 
 # backwards compat
 NDSparse(idx::Columns, data; kwargs...) = ndsparse(idx, data; kwargs...)
@@ -267,7 +267,7 @@ function permutedims(t::NDSparse, p::AbstractVector)
         throw(ArgumentError("argument to permutedims must be a valid permutation"))
     end
     flush!(t)
-    NDSparse(Columns(t.index.columns[p]), t.data, copy=true)
+    NDSparse(Columns(columns(t.index)[p]), t.data, copy=true)
 end
 
 # showing
@@ -312,7 +312,7 @@ function showmeta(io, t::NDSparse, cnames)
 end
 
 @noinline convert(::Type{NDSparse}, @nospecialize(ks), @nospecialize(vs); kwargs...) = ndsparse(ks, vs; kwargs...)
-@noinline convert(T::Type{NDSparse}, c::Columns{<:Pair}; kwargs...) = convert(T, c.columns.first, c.columns.second; kwargs...)
+@noinline convert(T::Type{NDSparse}, c::Columns{<:Pair}; kwargs...) = convert(T, columns(c).first, columns(c).second; kwargs...)
 
 # map and convert
 
@@ -344,9 +344,9 @@ end
 # Given an NDSparse array with multiple data columns (its data vector is a `Columns` object), return a
 # new array with the specified subset of data columns. Data is shared with the original array.
 # """
-# columns(x::NDSparse, which...) = NDSparse(x.index, Columns(x.data.columns[[which...]]), presorted=true)
+# columns(x::NDSparse, which...) = NDSparse(x.index, Columns(columns(x.data)[[which...]]), presorted=true)
 
-#columns(x::NDSparse, which) = NDSparse(x.index, x.data.columns[which], presorted=true)
+#columns(x::NDSparse, which) = NDSparse(x.index, columns(x.data)[which], presorted=true)
 
 #column(x::NDSparse, which) = columns(x, which)
 
diff --git a/src/reduce.jl b/src/reduce.jl
index d1675e6a..bfd303a4 100644
--- a/src/reduce.jl
+++ b/src/reduce.jl
@@ -342,7 +342,7 @@ function Base.reduce(f, x::NDSparse; kws...)
         if dims isa Symbol
             dims = [dims]
         end
-        keep = setdiff([1:ndims(x);], map(d->fieldindex(x.index.columns,d), dims))
+        keep = setdiff([1:ndims(x);], map(d->fieldindex(columns(x.index),d), dims))
         if isempty(keep)
             throw(ArgumentError("to remove all dimensions, use `reduce(f, A)`"))
         end
@@ -363,11 +363,11 @@ Like `reduce`, except uses a function mapping a vector of values to a scalar ins
 of a 2-argument scalar function.
 """
 function reducedim_vec(f, x::NDSparse, dims; with=valuenames(x))
-    keep = setdiff([1:ndims(x);], map(d->fieldindex(x.index.columns,d), dims))
+    keep = setdiff([1:ndims(x);], map(d->fieldindex(columns(x.index),d), dims))
     if isempty(keep)
         throw(ArgumentError("to remove all dimensions, use `reduce(f, A)`"))
     end
-    idxs, d = collect_columns(GroupBy(f, keys(x, (keep...,)), rows(x, with), sortpermby(x, (keep...,)))).columns
+    idxs, d = collect_columns(GroupBy(f, keys(x, (keep...,)), rows(x, with), sortpermby(x, (keep...,)))) |> columns
     NDSparse(idxs, d, presorted=true, copy=false)
 end
 

From 9e549518c0c36663517df59a31c93328ee060045 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 20:11:53 +0000
Subject: [PATCH 10/21] keep naming consistent at Columns

---
 src/columns.jl | 48 ++++++++++++++++++++++++------------------------
 src/reshape.jl |  4 ++--
 src/utils.jl   |  8 ++++----
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/columns.jl b/src/columns.jl
index 1078cacc..0361d081 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -1,7 +1,7 @@
 # to get rid of eventually
 const Columns = StructVector
 # There is a StackOverflow bug in this case in Base.unaliascopy
-Base.copy(c::StructVector{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c
+Base.copy(c::Columns{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c
 
 # IndexedTable-like API
 
@@ -29,8 +29,8 @@ function colnames end
 Base.@pure colnames(t::AbstractVector) = (1,)
 columns(v::AbstractVector) = v
 
-Base.@pure colnames(t::StructVector) = fieldnames(eltype(t))
-Base.@pure colnames(t::StructVector{<:Pair}) = colnames(t.first) => colnames(t.second)
+Base.@pure colnames(t::Columns) = fieldnames(eltype(t))
+Base.@pure colnames(t::Columns{<:Pair}) = colnames(t.first) => colnames(t.second)
 
 """
     columns(itr, select::Selection = All())
@@ -53,9 +53,9 @@ available selection options and syntax.
 """
 function columns end
 
-columns(c::StructVector{<:Tuple}) = Tuple(fieldarrays(c))
-columns(c::StructVector{<:NamedTuple}) = fieldarrays(c)
-columns(c::StructVector{<:Pair}) = c.first => c.second
+columns(c::Columns{<:Tuple}) = Tuple(fieldarrays(c))
+columns(c::Columns{<:NamedTuple}) = fieldarrays(c)
+columns(c::Columns{<:Pair}) = c.first => c.second
 
 """
     ncols(itr)
@@ -68,19 +68,19 @@ Returns the number of columns in `itr`.
     ncols(rows(([1,2,3],[4,5,6]))) == 2
 """
 function ncols end
-ncols(c::StructVector{T, C}) where {T, C} = fieldcount(C)
-ncols(c::StructVector{<:Pair}) = ncols(c.first) => ncols(c.second)
+ncols(c::Columns{T, C}) where {T, C} = fieldcount(C)
+ncols(c::Columns{<:Pair}) = ncols(c.first) => ncols(c.second)
 ncols(c::AbstractArray) = 1
 
-summary(c::StructVector{D}) where {D<:Tuple} = "$(length(c))-element StructVector{$D}"
+summary(c::Columns{D}) where {D<:Tuple} = "$(length(c))-element Columns{$D}"
 
-_sizehint!(c::StructVector, n::Integer) = (foreachfield(x->_sizehint!(x,n), c); c)
+_sizehint!(c::Columns, n::Integer) = (foreachfield(x->_sizehint!(x,n), c); c)
 
-function _strip_pair(c::StructVector{<:Pair})
+function _strip_pair(c::Columns{<:Pair})
     f, s = map(columns, fieldarrays(c))
     (f isa AbstractVector) && (f = (f,))
     (s isa AbstractVector) && (s = (s,))
-    StructVector((f..., s...))
+    Columns((f..., s...))
 end
 
 # fused indexing operations
@@ -96,13 +96,13 @@ end
 
 # row operations
 
-copyrow!(I::StructVector, i, src) = foreachfield(c->copyelt!(c, i, src), I)
-copyrow!(I::StructVector, i, src::StructVector, j) = foreachfield((c1,c2)->copyelt!(c1, i, c2, j), I, src)
+copyrow!(I::Columns, i, src) = foreachfield(c->copyelt!(c, i, src), I)
+copyrow!(I::Columns, i, src::Columns, j) = foreachfield((c1,c2)->copyelt!(c1, i, c2, j), I, src)
 copyrow!(I::AbstractArray, i, src::AbstractArray, j) = (@inbounds I[i] = src[j])
-pushrow!(to::StructVector, from::StructVector, i) = foreachfield((a,b)->push!(a, b[i]), to, from)
+pushrow!(to::Columns, from::Columns, i) = foreachfield((a,b)->push!(a, b[i]), to, from)
 pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i])
 
-@generated function rowless(c::StructVector{D,C}, i, j) where {D,C}
+@generated function rowless(c::Columns{D,C}, i, j) where {D,C}
     N = fieldcount(C)
     ex = :(cmpelts(getfield(fieldarrays(c),$N), i, j) < 0)
     for n in N-1:-1:1
@@ -115,7 +115,7 @@ pushrow!(to::AbstractArray, from::AbstractArray, i) = push!(to, from[i])
     ex
 end
 
-@generated function roweq(c::StructVector{D,C}, i, j) where {D,C}
+@generated function roweq(c::Columns{D,C}, i, j) where {D,C}
     N = fieldcount(C)
     ex = :(cmpelts(getfield(fieldarrays(c),1), i, j) == 0)
     for n in 2:N
@@ -128,7 +128,7 @@ end
 
 # uses number of columns from `d`, assuming `c` has more or equal
 # dimensions, for broadcast joins.
-@generated function rowcmp(c::StructVector, i, d::StructVector{D}, j) where D
+@generated function rowcmp(c::Columns, i, d::Columns{D}, j) where D
     N = fieldcount(D)
     ex = :(cmp(getfield(fieldarrays(c),$N)[i], getfield(fieldarrays(d),$N)[j]))
     for n in N-1:-1:1
@@ -149,7 +149,7 @@ end
 # all columns are equal except left >= right in last column.
 # Could be generalized to some number of trailing columns, but I don't
 # know whether that has applications.
-@generated function row_asof(c::StructVector{D,C}, i, d::StructVector{D,C}, j) where {D,C}
+@generated function row_asof(c::Columns{D,C}, i, d::Columns{D,C}, j) where {D,C}
     N = length(C.parameters)
     if N == 1
         ex = :(!isless(getfield(fieldarrays(c),1)[i], getfield(fieldarrays(d),1)[j]))
@@ -319,7 +319,7 @@ end
 column(c, x) = columns(c)[colindex(c, x)]
 
 # optimized method
-@inline function column(c::StructVector, x::Union{Int, Symbol})
+@inline function column(c::Columns, x::Union{Int, Symbol})
     getfield(fieldarrays(c), x)
 end
 
@@ -391,11 +391,11 @@ the [`select`](@ref) function for selection options and syntax.
 function rows end
 
 rows(x::AbstractVector) = x
-rows(cols::Tup) = StructVector(cols)
+rows(cols::Tup) = Columns(cols)
 
 rows(t, which...) = rows(columns(t, which...))
 
-_cols_tuple(xs::StructVector) = columns(xs)
+_cols_tuple(xs::Columns) = columns(xs)
 _cols_tuple(xs::AbstractArray) = (xs,)
 concat_cols(xs, ys) = rows(concat_tup(_cols_tuple(xs), _cols_tuple(ys)))
 
@@ -425,7 +425,7 @@ function ColDict(t; copy=nothing)
     ColDict(Int[], t, convert(Array{Any}, collect(cnames)), Any[columns(t)...], copy)
 end
 
-function Base.getindex(d::ColDict{<:StructVector})
+function Base.getindex(d::ColDict{<:Columns})
     Columns(d.columns...; names=d.names)
 end
 
@@ -757,4 +757,4 @@ end
 
 ### utils
 
-compact_mem(x::StructVector) = StructVector(map(compact_mem, columns(x)))
+compact_mem(x::Columns) = Columns(map(compact_mem, columns(x)))
diff --git a/src/reshape.jl b/src/reshape.jl
index 7960c4aa..aa540c10 100644
--- a/src/reshape.jl
+++ b/src/reshape.jl
@@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t
     labelcol = [label for i in 1:length(t) for label in labels]
 
     bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by))
-    convert(collectiontype(D), StructVector(bycols), StructVector(labelcol, valuecol, names = [variable, value]))
+    convert(collectiontype(D), Columns(bycols), Columns(labelcol, valuecol, names = [variable, value]))
 end
 
 """
@@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value)
 end
 
 function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S}
-    dest_val = StructVector((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
+    dest_val = Columns((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
     for (i, el) in enumerate(val)
         for (k, v) in el
             ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed")
diff --git a/src/utils.jl b/src/utils.jl
index 08a22afe..ee1ecb39 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -168,18 +168,18 @@ Base.@pure function arrayof(S)
     if T == Union{}
         Vector{Union{}}
     elseif T<:Tuple
-        StructVector{T, staticschema(Tuple{map(arrayof, fieldtypes(T))...})}
+        Columns{T, staticschema(Tuple{map(arrayof, fieldtypes(T))...})}
     elseif T<:NamedTuple
         if fieldcount(T) == 0
-            StructVector{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}}
+            Columns{NamedTuple{(), Tuple{}}, NamedTuple{(), Tuple{}}}
         else
-            StructVector{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}}
+            Columns{T,NamedTuple{fieldnames(T), Tuple{map(arrayof, fieldtypes(T))...}}}
         end
     elseif (T<:Union{Missing,String,WeakRefString} && Missing<:T) ||
         T<:Union{String, WeakRefString}
         StringArray{T, 1}
     elseif T<:Pair
-        StructVector{T, NamedTuple{(:first, :second), Tuple{map(arrayof, T.parameters)...}}}
+        Columns{T, NamedTuple{(:first, :second), Tuple{map(arrayof, T.parameters)...}}}
     else
         Vector{T}
     end

From 9aa4899bacbc48100dc89362511bbce305c85536 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 20:23:56 +0000
Subject: [PATCH 11/21] import refine_perm

---
 src/IndexedTables.jl |  3 ++-
 src/columns.jl       |  4 ++--
 src/indexing.jl      | 20 ++++++++++----------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/IndexedTables.jl b/src/IndexedTables.jl
index 29ce44c6..f0025a70 100644
--- a/src/IndexedTables.jl
+++ b/src/IndexedTables.jl
@@ -14,7 +14,8 @@ import Base:
     tuple_type_cons, tuple_type_head, tuple_type_tail, in, convert
 
 
-using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema
+using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema,
+                    refine_perm!
 
 #-----------------------------------------------------------------------# exports
 export 
diff --git a/src/columns.jl b/src/columns.jl
index 0361d081..0d2d1c03 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -180,11 +180,11 @@ elementwise. Collect output as `Columns` if `f` returns
     map_rows(i -> (exp = exp(i), log = log(i)), 1:5)
 """
 function map_rows(f, iters...)
-    collect_structarray(f(i...) for i in zip(iters...))
+    collect_columns(f(i...) for i in zip(iters...))
 end
 
 # 1-arg case
-map_rows(f, iter) = collect_structarray(f(i) for i in iter)
+map_rows(f, iter) = collect_columns(f(i) for i in iter)
 
 ## Special selectors to simplify column selector
 
diff --git a/src/indexing.jl b/src/indexing.jl
index 4a808cb0..e22d8ed4 100644
--- a/src/indexing.jl
+++ b/src/indexing.jl
@@ -42,19 +42,19 @@ isconstrange(col::AbstractVector{T}, idx::T) where {T} = true
 isconstrange(col, idx::AbstractArray) = isequal(first(idx), last(idx))
 
 function range_estimate(I::Columns, idxs)
-    r = range_estimate(I.columns[1], idxs[1])
+    r = range_estimate(columns(I)[1], idxs[1])
     i = 1; n = length(idxs)
-    while i < n && isconstrange(I.columns[i], idxs[i])
+    while i < n && isconstrange(columns(I)[i], idxs[i])
         i += 1
-        r = intersect(r, range_estimate(I.columns[i], idxs[i], first(r), last(r)))
+        r = intersect(r, range_estimate(columns(I)[i], idxs[i], first(r), last(r)))
     end
     return r
 end
 
 function _getindex(t::NDSparse, idxs)
     I = t.index
-    cs = astuple(I.columns)
-    if fieldcount(typeof(idxs)) !== fieldcount(typeof(I.columns))
+    cs = astuple(columns(I))
+    if fieldcount(typeof(idxs)) !== fieldcount(typeof(columns(I)))
         error("wrong number of indices")
     end
     for idx in idxs
@@ -63,7 +63,7 @@ function _getindex(t::NDSparse, idxs)
     out = convert(Vector{Int32}, range_estimate(I, idxs))
     filter!(i->row_in(cs, i, idxs), out)
     keepdims = filter(i->eltype(columns(t.index)[i]) != typeof(idxs[i]), 1:length(idxs))
-    NDSparse(Columns(map(x->x[out], getsubfields(I.columns, keepdims))), t.data[out], presorted=true)
+    NDSparse(Columns(map(x->x[out], getsubfields(columns(I), keepdims))), t.data[out], presorted=true)
 end
 
 # iterators over indices - lazy getindex
@@ -76,7 +76,7 @@ same index arguments as `getindex`.
 """
 function where(d::NDSparse, idxs::Vararg{Any,N}) where N
     I = d.index
-    cs = astuple(I.columns)
+    cs = astuple(columns(I))
     data = d.data
     rng = range_estimate(I, idxs)
     (data[i] for i in Iterators.Filter(r->row_in(cs, r, idxs), rng))
@@ -90,7 +90,7 @@ indices.
 """
 function update!(f::Union{Function,Type}, d::NDSparse, idxs::Vararg{Any,N}) where N
     I = d.index
-    cs = astuple(I.columns)
+    cs = astuple(columns(I))
     data = d.data
     rng = range_estimate(I, idxs)
     for r in rng
@@ -111,7 +111,7 @@ Similar to `where`, but returns an iterator giving `index=>value` pairs.
 """
 function pairs(d::NDSparse, idxs::Vararg{Any,N}) where N
     I = d.index
-    cs = astuple(I.columns)
+    cs = astuple(columns(I))
     data = d.data
     rng = range_estimate(I, idxs)
     (I[i]=>data[i] for i in Compat.Iterators.Filter(r->row_in(cs, r, idxs), rng))
@@ -190,7 +190,7 @@ function _setindex!(d::NDSparse{T,D}, rhs, idxs) where {T,D}
     end
     flush!(d)
     I = d.index
-    cs = astuple(I.columns)
+    cs = astuple(columns(I))
     data = d.data
     rng = range_estimate(I, idxs)
     for r in rng

From 197de5637e9ffcc9fb76f3a47966d535370ccaff Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 21:56:47 +0000
Subject: [PATCH 12/21] avoid multi argument Columns

---
 src/columns.jl    |  2 +-
 src/ndsparse.jl   |  2 +-
 src/reshape.jl    |  4 ++--
 test/test_core.jl | 22 +++++++++++-----------
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/columns.jl b/src/columns.jl
index 0d2d1c03..cef4071c 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -426,7 +426,7 @@ function ColDict(t; copy=nothing)
 end
 
 function Base.getindex(d::ColDict{<:Columns})
-    Columns(d.columns...; names=d.names)
+    Columns(Tuple(d.columns); names=d.names)
 end
 
 Base.getindex(d::ColDict, key) = rows(d[], key)
diff --git a/src/ndsparse.jl b/src/ndsparse.jl
index 44752251..c72a36bb 100644
--- a/src/ndsparse.jl
+++ b/src/ndsparse.jl
@@ -177,7 +177,7 @@ Construct an NDSparse array from columns. The last argument is the data column,
 """
 function NDSparse(columns...; names=nothing, rest...)
     keys, data = columns[1:end-1], columns[end]
-    ndsparse(Columns(keys..., names=names), data; rest...)
+    ndsparse(Columns(keys, names=names), data; rest...)
 end
 
 similar(t::NDSparse) = NDSparse(similar(t.index, 0), similar(t.data, 0))
diff --git a/src/reshape.jl b/src/reshape.jl
index aa540c10..b89257b2 100644
--- a/src/reshape.jl
+++ b/src/reshape.jl
@@ -26,7 +26,7 @@ function stack(t::D, by = pkeynames(t); select = isa(t, NDSparse) ? valuenames(t
     labelcol = [label for i in 1:length(t) for label in labels]
 
     bycols = map(arg -> repeat(arg, inner = length(valuecols)), columns(t, by))
-    convert(collectiontype(D), Columns(bycols), Columns(labelcol, valuecol, names = [variable, value]))
+    convert(collectiontype(D), Columns(bycols), Columns((labelcol, valuecol), names = [variable, value]))
 end
 
 """
@@ -53,7 +53,7 @@ function unstack(t::D, by = pkeynames(t); variable = :variable, value = :value)
 end
 
 function unstack(::Type{D}, ::Type{T}, key, val, cols::AbstractVector{S}) where {D <:Dataset, T, S}
-    dest_val = Columns((Array{Union{T, Missing}}(undef, length(val)) for i in cols)...; names = cols)
+    dest_val = Columns(Tuple(Array{Union{T, Missing}}(undef, length(val)) for i in cols); names = cols)
     for (i, el) in enumerate(val)
         for (k, v) in el
             ismissing(columns(dest_val, S(k))[i]) || error("Repeated values with same label are not allowed")
diff --git a/test/test_core.jl b/test/test_core.jl
index b91ccf5d..ffd60df2 100644
--- a/test/test_core.jl
+++ b/test/test_core.jl
@@ -1,14 +1,14 @@
 
 
-    c = Columns([1,1,1,2,2], [1,2,4,3,5])
-    d = Columns([1,1,2,2,2], [1,3,1,4,5])
-    e = Columns([1,1,1], sort([rand(),0.5,rand()]))
-    f = Columns([1,1,1], sort([rand(),0.5,rand()]))
-    @test map(+,NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns([1,2],[1,5])
+    c = Columns(([1,1,1,2,2], [1,2,4,3,5]))
+    d = Columns(([1,1,2,2,2], [1,3,1,4,5]))
+    e = Columns(([1,1,1], sort([rand(),0.5,rand()])))
+    f = Columns(([1,1,1], sort([rand(),0.5,rand()])))
+    @test map(+,NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns(([1,2],[1,5]))
     @test length(map(+,NDSparse(e,ones(3)),NDSparse(f,ones(3)))) == 1
     @test eltype(c) == Tuple{Int,Int}
     @test map_rows(i -> (exp = exp(i), log = log(i)), 1:5) == Columns((exp = exp.(1:5), log = log.(1:5)))
-    @test map_rows(tuple, 1:3, ["a","b","c"]) == Columns([1,2,3], ["a","b","c"])
+    @test map_rows(tuple, 1:3, ["a","b","c"]) == Columns(([1,2,3], ["a","b","c"]))
 
  c = Columns(Columns((a=[1,2,3],)) => Columns((b=["a","b","c"],)))
     @test columns(c).first == Columns((a=[1,2,3],))
@@ -33,7 +33,7 @@
     empty!(d)
     @test d == c[Int[]]
     @test c != Columns((a=[1,2,3], b=["a","b","c"]))
-    x = Columns([1], [1.0], WeakRefStrings.StringArray(["a"]))
+    x = Columns(([1], [1.0], WeakRefStrings.StringArray(["a"])))
     @test IndexedTables.arrayof(eltype(x)) == typeof(x)
     @test IndexedTables.arrayof(WeakRefString{UInt8}) == WeakRefStrings.StringArray{WeakRefString{UInt8},1}
     @test typeof(similar(c, 10)) == typeof(similar(typeof(c), 10)) == typeof(c)
@@ -42,10 +42,10 @@
     @test sortperm(c) == [1,2,3]
     permute!(c, [2,3, 1])
     @test c == Columns(Columns((a=[2,3,1],)) => Columns((b=["b","c","a"],)))
-    f = Columns(Columns([1, 1, 2, 2]) => ["b", "a", "c", "d"])
-    @test IndexedTables._strip_pair(f) == Columns([1, 1, 2, 2], ["b", "a", "c", "d"])
+    f = Columns(Columns(([1, 1, 2, 2],)) => ["b", "a", "c", "d"])
+    @test IndexedTables._strip_pair(f) == Columns(([1, 1, 2, 2], ["b", "a", "c", "d"]))
     @test sortperm(f) == [2, 1, 3, 4]
-    @test sort(f) == Columns(Columns([1, 1, 2, 2]) => ["a", "b", "c", "d"])
+    @test sort(f) == Columns(Columns(([1, 1, 2, 2],)) => ["a", "b", "c", "d"])
     @test !issorted(f)
 #end
 
@@ -93,7 +93,7 @@ end
     # Tuple output
     b1 = broadcast((x,y)->(x.a, y.c), t, t1)
     @test isa(b1.data, Columns)
-    @test b1 == NDSparse(idx, Columns([5,6], [4,5]))
+    @test b1 == NDSparse(idx, Columns(([5,6], [4,5])))
 
     b2 = broadcast((x,y)->(m=x.a, n=y.c), t, t1)
     @test b2 == NDSparse(idx, Columns(m=[5,6], n=[4,5]))

From 9259f6ab6d4e4da5ca5c2c3e6748af6aae8a336a Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 22:18:07 +0000
Subject: [PATCH 13/21] fixed ndsparse core tests

---
 src/indexedtable.jl |  2 +-
 src/ndsparse.jl     |  2 +-
 test/test_core.jl   | 44 ++++++++++++++++++++++----------------------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/indexedtable.jl b/src/indexedtable.jl
index 0c7efede..016c27b3 100644
--- a/src/indexedtable.jl
+++ b/src/indexedtable.jl
@@ -312,7 +312,7 @@ a default key of tuples `(1,):(n,)` is generated.
 """
 function pkeys(t::IndexedTable)
     if isempty(t.pkey)
-        Columns(Base.OneTo(length(t)))
+        Columns((Base.OneTo(length(t)),))
     else
         rows(t, pkeynames(t))
     end
diff --git a/src/ndsparse.jl b/src/ndsparse.jl
index c72a36bb..063fbc75 100644
--- a/src/ndsparse.jl
+++ b/src/ndsparse.jl
@@ -383,7 +383,7 @@ function convert(::Type{NDSparse}, a::AbstractArray{T}) where T
         end
         i += 1
     end
-    NDSparse(Columns(reverse(idxs)...), data, presorted=true)
+    NDSparse(Columns(Tuple(Iterators.reverse(idxs))), data, presorted=true)
 end
 
 # aggregation
diff --git a/test/test_core.jl b/test/test_core.jl
index ffd60df2..52643747 100644
--- a/test/test_core.jl
+++ b/test/test_core.jl
@@ -213,7 +213,7 @@ x = NDSparse(Columns(x = [1,2,3], y = [4,5,6], z = [7,8,9]), [10,11,12])
 
 # test showing
 
-@test repr(ndsparse(Columns([1]), Columns(()))) == """
+@test repr(ndsparse(Columns(([1],)), Columns(()))) == """
 1-d NDSparse with 1 values (0-tuples):
 1 │
 ──┼
@@ -284,7 +284,7 @@ function foo(n, data=ones(Int, 1))
     NDSparse(Columns(t([ones(Int, 1) for i=1:n]...)), data)
 end
 
-let x = Columns([6,5,4,3,2,2,1],[4,4,4,4,4,4,4],[1,2,3,4,5,6,7])
+let x = Columns(([6,5,4,3,2,2,1],[4,4,4,4,4,4,4],[1,2,3,4,5,6,7]))
     @test issorted(x[sortperm(x)])
 end
 
@@ -300,7 +300,7 @@ let hitemps = NDSparse([fill("New York",3); fill("Boston",3)],
                                                      [91,76])
 end
 
-    cs = Columns([1], [2])
+    cs = Columns(([1], [2]))
     t = table(cs)
     @test t.pkey == Int[]
     @test t.columns == [(1,2)]
@@ -309,22 +309,22 @@ end
     @test column(t.columns,1) === columns(cs)[1]
     t = table(cs, copy=false, pkey=[1])
     @test column(t.columns,1) === columns(cs)[1]
-    cs = Columns([2, 1], [3,4])
+    cs = Columns(([2, 1], [3,4]))
     t = table(cs, copy=false, pkey=[1])
     @test t.pkey == Int[1]
-    cs = Columns([2, 1], [3,4])
+    cs = Columns(([2, 1], [3,4]))
     t = table(cs, copy=false, pkey=[1])
     @test column(t.columns,1) === columns(cs)[1]
     @test t.pkey == Int[1]
     @test t.columns == [(1,4), (2,3)]
 
-    cs = Columns(x=[2, 1], y=[3,4])
+    cs = Columns((x=[2, 1], y=[3,4]))
     t = table(cs, copy=false, pkey=:x)
     @test column(t.columns,1) === columns(cs).x
     @test t.pkey == Int[1]
     @test t.columns == [(x=1,y=4), (x=2,y=3)]
 
-    cs = Columns([2, 1], [3,4])
+    cs = Columns(([2, 1], [3,4]))
     t = table(cs, presorted=true, pkey=[1])
     @test t.pkey == Int[1]
     @test t.columns == [(2,3), (1,4)]
@@ -333,7 +333,7 @@ end
     b = table([1, 2, 3], [4, 5, 6], names=[:x, :y])
     @test table(([1, 2, 3], [4, 5, 6])) == a
     @test table((x = [1, 2, 3], y = [4, 5, 6])) == b
-    @test table(Columns([1, 2, 3], [4, 5, 6])) == a
+    @test table(Columns(([1, 2, 3], [4, 5, 6]))) == a
     @test table(Columns(x=[1, 2, 3], y=[4, 5, 6])) == b
     @test b == table(b)
     b = table([2, 3, 1], [4, 5, 6], names=[:x, :y], pkey=:x)
@@ -355,15 +355,15 @@ end
     @test excludecols([1, 2, 3], (1,)) == ()
     @test convert(IndexedTable, Columns(x=[1, 2], y=[3, 4]), Columns(z=[1, 2]), presorted=true) == table([1, 2], [3, 4], [1, 2], names=Symbol[:x, :y, :z])
     @test colnames([1, 2, 3]) == (1,)
-    @test colnames(Columns([1, 2, 3], [3, 4, 5])) == (1, 2)
+    @test colnames(Columns(([1, 2, 3], [3, 4, 5]))) == (1, 2)
     @test colnames(table([1, 2, 3], [3, 4, 5])) == (1, 2)
     @test colnames(Columns(x=[1, 2, 3], y=[3, 4, 5])) == (:x, :y)
     @test colnames(table([1, 2, 3], [3, 4, 5], names=[:x, :y])) == (:x, :y)
     @test colnames(ndsparse(Columns(x=[1, 2, 3]), Columns(y=[3, 4, 5]))) == (:x, :y)
     @test colnames(ndsparse(Columns(x=[1, 2, 3]), [3, 4, 5])) == (:x, 2)
     @test colnames(ndsparse(Columns(x=[1, 2, 3]), [3, 4, 5])) == (:x, 2)
-    @test colnames(ndsparse(Columns([1, 2, 3], [4, 5, 6]), Columns(x=[6, 7, 8]))) == (1, 2, :x)
-    @test colnames(ndsparse(Columns(x=[1, 2, 3]), Columns([3, 4, 5], [6, 7, 8]))) == (:x, 2, 3)
+    @test colnames(ndsparse(Columns(([1, 2, 3], [4, 5, 6])), Columns(x=[6, 7, 8]))) == (1, 2, :x)
+    @test colnames(ndsparse(Columns(x=[1, 2, 3]), Columns(([3, 4, 5], [6, 7, 8])))) == (:x, 2, 3)
 
     x = ndsparse(["a", "b"], [3, 4])
     @test (keytype(x), eltype(x)) == (Tuple{String}, Int)
@@ -381,9 +381,9 @@ end
     x = ndsparse([1, 2], [3, 4])
     @test pkeynames(x) == (1,)
 
-    a = Columns([1,2,1],["foo","bar","baz"])
-    b = Columns([2,1,1],["bar","baz","foo"])
-    c = Columns([1,1,2],["foo","baz","bar"])
+    a = Columns(([1,2,1],["foo","bar","baz"]))
+    b = Columns(([2,1,1],["bar","baz","foo"]))
+    c = Columns(([1,1,2],["foo","baz","bar"]))
     @test a != b
     @test a != c
     @test b != c
@@ -404,16 +404,16 @@ end
     #78
     @test_throws ArgumentError map(x->throw(ArgumentError("x")), a)
     @inferred Columns((c=[1],))
-    @inferred Columns([1])
-    @test_throws ErrorException @inferred Columns(c=[1]) # bad
+    @inferred Columns(([1],))
+    @inferred Columns(c=[1])
     #@inferred NDSparse(Columns(c=[1]), [1])
     #@inferred NDSparse(Columns([1]), [1])
-    c = Columns([1,1,1,2,2], [1,2,4,3,5])
-    d = Columns([1,1,2,2,2], [1,3,1,4,5])
-    e = Columns([1,1,1], sort([rand(),0.5,rand()]))
-    f = Columns([1,1,1], sort([rand(),0.5,rand()]))
-    @test merge(NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns([1,1,1,1,2,2,2,2],[1,2,3,4,1,3,4,5])
-    @test eltype(merge(NDSparse(c,Columns(ones(Int, 5))),NDSparse(d,Columns(ones(Float64, 5)))).data) == Tuple{Float64}
+    c = Columns(([1,1,1,2,2], [1,2,4,3,5]))
+    d = Columns(([1,1,2,2,2], [1,3,1,4,5]))
+    e = Columns(([1,1,1], sort([rand(),0.5,rand()])))
+    f = Columns(([1,1,1], sort([rand(),0.5,rand()])))
+    @test merge(NDSparse(c,ones(5)),NDSparse(d,ones(5))).index == Columns(([1,1,1,1,2,2,2,2],[1,2,3,4,1,3,4,5]))
+    @test eltype(merge(NDSparse(c,Columns((ones(Int, 5),))),NDSparse(d,Columns((ones(Float64, 5),)))).data) == Tuple{Float64}
     @test eltype(merge(NDSparse(c,Columns(x=ones(Int, 5))),NDSparse(d,Columns(x=ones(Float64, 5)))).data) == typeof((x=0.,))
     @test length(merge(NDSparse(e,ones(3)),NDSparse(f,ones(3)))) == 5
     @test vcat(Columns(x=[1]), Columns(x=[1.0])) == Columns(x=[1,1.0])

From 9c3987fbccf608027c09d2b47d7dec63a04033ba Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Mon, 17 Dec 2018 23:10:56 +0000
Subject: [PATCH 14/21] fixed core tests

---
 src/indexedtable.jl |  2 +-
 src/reduce.jl       |  4 ++--
 test/test_core.jl   | 22 +++++++++++-----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/indexedtable.jl b/src/indexedtable.jl
index 016c27b3..3f8f0be2 100644
--- a/src/indexedtable.jl
+++ b/src/indexedtable.jl
@@ -141,7 +141,7 @@ function table(cs::Tup; chunks=nothing, kwargs...)
 end
 
 table(cs::Columns; kwargs...) = table(columns(cs); kwargs...)
-table(c::Columns{<:Pair}; kwargs...) = convert(IndexedTable, c.columns.first, c.columns.second; kwargs...)
+table(c::Columns{<:Pair}; kwargs...) = convert(IndexedTable, columns(c).first, columns(c).second; kwargs...)
 
 function table(cols::AbstractArray...; names=nothing, kwargs...)
     if isa(names, AbstractArray) && all(x->isa(x, Symbol), names)
diff --git a/src/reduce.jl b/src/reduce.jl
index bfd303a4..b0c7baf9 100644
--- a/src/reduce.jl
+++ b/src/reduce.jl
@@ -251,14 +251,14 @@ init_func(ac::ApplyColwise{<:Tuple}, t::AbstractVector) =
     Tuple(Symbol(n) => f for (f, n) in zip(ac.functions, ac.names))
 function init_func(ac::ApplyColwise{<:Tuple}, t::Columns)
     if ac.stack
-        dd -> Columns(collect(colnames(t)), ([f(x) for x in columns(dd)] for f in ac.functions)...; names = vcat(ac.variable, ac.names))
+        dd -> Columns((collect(colnames(t)), ([f(x) for x in columns(dd)] for f in ac.functions)...); names = vcat(ac.variable, ac.names))
     else
         Tuple(Symbol(s, :_, n) => s => f for s in colnames(t), (f, n) in zip(ac.functions, ac.names))
     end
 end
 
 init_func(ac::ApplyColwise, t::Columns) =
-    ac.stack ? dd -> Columns(collect(colnames(t)), [ac.functions(x) for x in columns(dd)]; names = vcat(ac.variable, ac.names)) :
+    ac.stack ? dd -> Columns((collect(colnames(t)), [ac.functions(x) for x in columns(dd)]); names = vcat(ac.variable, ac.names)) :
         Tuple(s => s => ac.functions for s in colnames(t))
 init_func(ac::ApplyColwise, t::AbstractVector) = ac.functions
 
diff --git a/test/test_core.jl b/test/test_core.jl
index 52643747..68ea1106 100644
--- a/test/test_core.jl
+++ b/test/test_core.jl
@@ -679,11 +679,11 @@ end
     @test c[12,52] == 67
     @test c[32,34] == 160
     @test length(c.index) == 2
-    @test naturaljoin(a, b) == NDSparse([12,32], [52,34], Columns([11,150], [56,10]))
+    @test naturaljoin(a, b) == NDSparse([12,32], [52,34], Columns(([11,150], [56,10])))
 
-    c = NDSparse([12,32], [52,34], Columns([0,1], [2,3]))
-    @test naturaljoin(a, c) == NDSparse([12,32], [52,34], Columns([11,150], [0,1], [2,3]))
-    @test naturaljoin(c, a) == NDSparse([12,32], [52,34], Columns([0,1], [2,3], [11,150]))
+    c = NDSparse([12,32], [52,34], Columns(([0,1], [2,3])))
+    @test naturaljoin(a, c) == NDSparse([12,32], [52,34], Columns(([11,150], [0,1], [2,3])))
+    @test naturaljoin(c, a) == NDSparse([12,32], [52,34], Columns(([0,1], [2,3], [11,150])))
 
     @test isequal(
         leftjoin(t1, t2, lselect=2, rselect=2),
@@ -702,12 +702,12 @@ end
 
     @test isequal(leftjoin(NDSparse([1,1,1,2], [2,3,4,4], [5,6,7,8]),
                    NDSparse([1,1,3],   [2,4,4],   [9,10,12])),
-                  NDSparse([1,1,1,2], [2,3,4,4], Columns([5, 6, 7, 8], [9, missing, 10, missing])))
+                  NDSparse([1,1,1,2], [2,3,4,4], Columns(([5, 6, 7, 8], [9, missing, 10, missing]))))
 
     @test isequal(
                   leftjoin(NDSparse([1,1,1,2], [2,3,4,4], [5,6,7,8]),
                    NDSparse([1,1,2],   [2,4,4],   [9,10,12])),
-                  NDSparse([1,1,1,2], [2,3,4,4], Columns([5, 6, 7, 8], [9, missing, 10, 12])))
+                  NDSparse([1,1,1,2], [2,3,4,4], Columns(([5, 6, 7, 8], [9, missing, 10, 12]))))
 
 
     @test isequal(outerjoin(t1, t2, lselect=2, rselect=2), table([0,1,2,3,4,5], [missing, 5,6,7,8,missing], [5,missing,missing,6,7,8]))
@@ -1093,7 +1093,7 @@ end
     A = [1]
     # shouldn't mutate input
     mapslices(x, [:a]) do slice
-        NDSparse(Columns(A), A)
+        NDSparse(Columns((A,)), A)
     end
     @test A == [1]
 
@@ -1115,15 +1115,15 @@ end
     @test t==NDSparse(Columns(a_1=[1], a_2=[2], c=[2]), Columns(d=[1]))
 
     # signleton slices
-    x=NDSparse(Columns([1,2]),Columns([1,2]))
+    x=NDSparse(Columns(([1,2],)),Columns(([1,2],)))
     @test_throws ErrorException mapslices(x,()) do slice
         true
     end
     t = mapslices(x,()) do slice
-        @test slice == NDSparse(Columns([1]), Columns([1])) || slice == NDSparse(Columns([2]), Columns([2]))
-        NDSparse(Columns([1]), ([1]))
+        @test slice == NDSparse(Columns(([1],)), Columns(([1],))) || slice == NDSparse(Columns(([2],)), Columns(([2],)))
+        NDSparse(Columns(([1],)), ([1]))
     end
-    @test t == NDSparse(Columns([1,2], [1,1]), [1,1])
+    @test t == NDSparse(Columns(([1,2], [1,1])), [1,1])
 
     x = NDSparse([1,1,1,2,2,2,3,3],[1,2,3,4,5,6,7,8],rand(8));
     y = mapslices(t -> (1, 2), x, 2)

From f66e29033c8e7045dafade603d3cbd5a418ab7f4 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 11:00:04 +0000
Subject: [PATCH 15/21] use old copy behavior

---
 src/indexedtable.jl | 4 ++--
 src/ndsparse.jl     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/indexedtable.jl b/src/indexedtable.jl
index 3f8f0be2..64e0ab7e 100644
--- a/src/indexedtable.jl
+++ b/src/indexedtable.jl
@@ -101,10 +101,10 @@ function table(::Val{:serial}, cols::Tup;
                 cs = permute!(cs, perm)
             end
         elseif copy
-            cs = Base.copy(cs)
+            cs = copyto!(similar(cs), cs)
         end
     elseif copy
-        cs = Base.copy(cs)
+        cs = copyto!(similar(cs), cs)
     end
 
     intpkey = map(k->colindex(cs, k), pkey)
diff --git a/src/ndsparse.jl b/src/ndsparse.jl
index 063fbc75..a83b77ee 100644
--- a/src/ndsparse.jl
+++ b/src/ndsparse.jl
@@ -91,8 +91,8 @@ function ndsparse(::Val{:serial}, ks::Tup, vs::Union{Tup, AbstractVector};
             I, d = collect_columns(iter) |> columns
             agg = nothing
         else
-            I = Base.copy(I)
-            d = Base.copy(d)
+            I = copyto!(similar(I), I)
+            d = copyto!(similar(d), d)
         end
     end
     stripnames(x) = isa(x, Columns) ? rows(astuple(columns(x))) : rows((x,))

From 621caae7a6095fae59b4873877e634420db523b8 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 11:08:45 +0000
Subject: [PATCH 16/21] fix flatten

---
 src/flatten.jl | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/flatten.jl b/src/flatten.jl
index 9f75e762..4de17ecd 100644
--- a/src/flatten.jl
+++ b/src/flatten.jl
@@ -16,10 +16,10 @@ function dedup_names(ns)
 end
 
 function mapslices(f, x::NDSparse, dims; name = nothing)
-    iterdims = setdiff([1:ndims(x);], map(d->fieldindex(x.index.columns,d), dims))
-    idx = Any[Colon() for v in x.index.columns]
+    iterdims = setdiff([1:ndims(x);], map(d->fieldindex(columns(x.index),d), dims))
+    idx = Any[Colon() for v in columns(x.index)]
 
-    iter = Columns(astuple(x.index.columns)[[iterdims...]])
+    iter = Columns(astuple(columns(x.index))[[iterdims...]])
     if !isempty(dims) || !issorted(iter)
         iter = sort(iter)
     end
@@ -48,7 +48,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing)
         for j=1:n
             @inbounds index_first[j] = iter[1]
         end
-        index = Columns(index_first.columns..., astuple(copy(y.index).columns)...; names=ns)
+        index = Columns((columns(index_first)..., astuple(columns(copy(y.index)))...); names=ns)
         data = copy(y.data)
         output = NDSparse(index, data)
         if isempty(dims)
@@ -61,7 +61,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing)
         if !all(x->isa(x, Symbol), ns)
             ns = nothing
         end
-        index = Columns(iter[1:1].columns...; names=ns)
+        index = Columns(Tuple(columns(iter[1:1])); names=ns)
         if isa(y, Tup)
             vec = convert(Columns, [y])
         else
@@ -70,7 +70,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing)
         if name === nothing
             output = NDSparse(index, vec)
         else
-            output = NDSparse(index, Columns(vec, names=[name]))
+            output = NDSparse(index, Columns(Tuple(columns(vec)), names=[name]))
         end
         if isempty(dims)
             error("calling mapslices with no dimensions and scalar return value -- use map instead")
@@ -81,7 +81,7 @@ function mapslices(f, x::NDSparse, dims; name = nothing)
 end
 
 function _mapslices_scalar!(f, output, x, iter, iterdims, start, coerce)
-    idx = Any[Colon() for v in x.index.columns]
+    idx = Any[Colon() for v in columns(x.index)]
 
     for i = start:length(iter)
         if i != 1 && roweq(iter, i-1, i) # We've already visited this slice
@@ -105,15 +105,15 @@ function _mapslices_itable_singleton!(f, output, x, start)
     I = output.index
     D = output.data
 
-    I1 = Columns(I.columns[1:ndims(x)])
-    I2 = Columns(I.columns[ndims(x)+1:end])
+    I1 = Columns(columns(I)[1:ndims(x)])
+    I2 = Columns(columns(I)[ndims(x)+1:end])
     i = start
     for i in start:length(x)
         k = x.index[i]
         y = f(NDSparse(x.index[i:i], x.data[i:i]))
         n = length(y)
 
-        foreach((x,y)->append_n!(x,y,n), I1.columns, k)
+        foreach((x,y)->append_n!(x,y,n), columns(I1), k)
         append!(I2, y.index)
         append!(D, y.data)
     end
@@ -121,13 +121,13 @@ function _mapslices_itable_singleton!(f, output, x, start)
 end
 
 function _mapslices_itable!(f, output, x, iter, iterdims, start)
-    idx = Any[Colon() for v in x.index.columns]
+    idx = Any[Colon() for v in columns(x.index)]
     I = output.index
     D = output.data
     initdims = length(iterdims)
 
-    I1 = Columns(getsubfields(I.columns, 1:initdims)) # filled from existing table
-    I2 = Columns(getsubfields(I.columns, initdims+1:fieldcount(typeof(I.columns)))) # filled from output tables
+    I1 = Columns(getsubfields(columns(I), 1:initdims)) # filled from existing table
+    I2 = Columns(getsubfields(columns(I), initdims+1:fieldcount(typeof(columns(I))))) # filled from output tables
 
     for i = start:length(iter)
         if i != 1 && roweq(iter, i-1, i) # We've already visited this slice
@@ -144,7 +144,7 @@ function _mapslices_itable!(f, output, x, iter, iterdims, start)
         y = f(subtable)
         n = length(y)
 
-        foreach((x,y)->append_n!(x,y,n), I1.columns, iter[i])
+        foreach((x,y)->append_n!(x,y,n), columns(I1), iter[i])
         append!(I2, y.index)
         append!(D, y.data)
     end

From 9a85be88bdf2bd8ed6c210597c653c4f4b7babd1 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 11:15:51 +0000
Subject: [PATCH 17/21] fix all tests

---
 src/collect.jl       |  6 +++---
 test/test_collect.jl |  4 ++--
 test/test_utils.jl   | 12 ++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/collect.jl b/src/collect.jl
index 47166287..e856d9dc 100644
--- a/src/collect.jl
+++ b/src/collect.jl
@@ -98,7 +98,7 @@ function collect_columns_flattened(itr, el::Pair, st)
 end
 
 function collect_columns_flattened!(dest::Columns{<:Pair}, itr, el::Pair, st)
-    dest_key, dest_data = dest.columns
+    dest_key, dest_data = columns(dest)
     while true
         elem = iterate(itr, st)
         elem === nothing && break
@@ -178,7 +178,7 @@ function widencolumns(dest, i, el::S, ::Type{T}) where{S, T}
 end
 
 function widencolumns(dest::Columns{<:Pair}, i, el::Pair, ::Type{Pair{T1, T2}}) where{T1, T2}
-    dest1 = fieldwise_isa(el.first, T1) ? dest.columns.first : widencolumns(dest.columns.first, i, el.first, T1)
-    dest2 = fieldwise_isa(el.second, T2) ? dest.columns.second : widencolumns(dest.columns.second, i, el.second, T2)
+    dest1 = fieldwise_isa(el.first, T1) ? columns(dest).first : widencolumns(columns(dest).first, i, el.first, T1)
+    dest2 = fieldwise_isa(el.second, T2) ? columns(dest).second : widencolumns(columns(dest).second, i, el.second, T2)
     Columns(dest1 => dest2)
 end
diff --git a/test/test_collect.jl b/test/test_collect.jl
index 610f54c3..c6a52b8b 100644
--- a/test/test_collect.jl
+++ b/test/test_collect.jl
@@ -63,11 +63,11 @@ end
     # empty
     itr = Iterators.filter(t -> t > 10, 1:8)
     tuple_itr = ((i+1, i-1) for i in itr)
-    @test collect_columns(tuple_itr) == Columns(Int[], Int[])
+    @test collect_columns(tuple_itr) == Columns((Int[], Int[]))
 
     itr = (i for i in 0:-1)
     tuple_itr = ((i+1, i-1) for i in itr)
-    @test collect_columns(tuple_itr) == Columns(Int[], Int[])
+    @test collect_columns(tuple_itr) == Columns((Int[], Int[]))
 end
 
 @testset "collectscalars" begin
diff --git a/test/test_utils.jl b/test/test_utils.jl
index 264758aa..9a68a859 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -13,9 +13,9 @@ let a = [1:10;]
     @test IndexedTables._sizehint!(a, 20) === a
 end
 
-@test Columns([1,2], [3,4]) == Columns([1,2], [3.0,4.0])
-@test Columns([1,2], [3,4]) != Columns([1,2], [3.0,4.1])
-@test Columns([1,2], [3,4]) != Columns(a=[1,2], b=[3,4])
+@test Columns(([1,2], [3,4])) == Columns(([1,2], [3.0,4.0]))
+@test Columns(([1,2], [3,4])) != Columns(([1,2], [3.0,4.1]))
+@test Columns(([1,2], [3,4])) != Columns((a=[1,2], b=[3,4]))
 
 function roundtrips(x)
     b = IOBuffer()
@@ -24,13 +24,13 @@ function roundtrips(x)
     return deserialize(b) == x
 end
 
-@test roundtrips(Columns(rand(5), rand(5)))
+@test roundtrips(Columns((rand(5), rand(5))))
 @test roundtrips(Columns(c1 = rand(5), c2 = rand(5)))
 @test roundtrips(convert(NDSparse, rand(3,3)))
 @test roundtrips(NDSparse(Columns(y=rand(3), x=rand(3)), rand(3)))
 
 let x = rand(3), y = rand(3), v = rand(3), w = rand(3)
-    @test vcat(Columns(x,y), Columns(v,w)) == Columns(vcat(x,v), vcat(y,w))
+    @test vcat((Columns((x,y))), Columns((v,w))) == Columns((vcat(x,v), vcat(y,w)))
     @test vcat(Columns(x=x,y=y), Columns(x=v,y=w)) == Columns(x=vcat(x,v), y=vcat(y,w))
 end
 
@@ -60,5 +60,5 @@ let
 
     # 97
     x = ndsparse((t=[0.01, 0.05],), (x=[1,2], y=[3,4],))
-    @test map(p->(r = sum(p),), x).data == Columns([4,6], names=[:r])
+    @test map(p->(r = sum(p),), x).data == Columns(([4,6],), names=[:r])
 end

From 33a617c9ecc9bcb86b9b0d44bb0b82ba38973691 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 11:20:51 +0000
Subject: [PATCH 18/21] switch to structarray collection mechanism

---
 src/IndexedTables.jl |   4 +-
 src/collect.jl       | 121 ++-----------------------------------------
 test/test_collect.jl |  43 ++++++---------
 test/test_core.jl    |   2 +-
 4 files changed, 25 insertions(+), 145 deletions(-)

diff --git a/src/IndexedTables.jl b/src/IndexedTables.jl
index f0025a70..6e8960f8 100644
--- a/src/IndexedTables.jl
+++ b/src/IndexedTables.jl
@@ -14,8 +14,8 @@ import Base:
     tuple_type_cons, tuple_type_head, tuple_type_tail, in, convert
 
 
-using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema,
-                    refine_perm!
+using StructArrays: StructVector, StructArray, foreachfield, fieldarrays, collect_structarray, staticschema, ArrayInitializer,
+                    refine_perm!, collect_structarray, collect_empty_structarray, grow_to_structarray!, collect_to_structarray! 
 
 #-----------------------------------------------------------------------# exports
 export 
diff --git a/src/collect.jl b/src/collect.jl
index e856d9dc..276ed515 100644
--- a/src/collect.jl
+++ b/src/collect.jl
@@ -1,4 +1,4 @@
-_is_subtype(::Type{S}, ::Type{T}) where {S, T} = promote_type(S, T) == T
+const default_initializer = ArrayInitializer(t -> t<:Union{Tuple, NamedTuple, Pair})
 
 """
     collect_columns(itr)
@@ -14,50 +14,11 @@ Collect an iterable as a `Columns` object if it iterates `Tuples` or `NamedTuple
     s2 = Iterators.filter(isodd, 1:8)
     collect_columns(s2)
 """
-collect_columns(itr) = collect_columns(itr, Base.IteratorSize(itr))
+collect_columns(itr) = collect_structarray(itr, initializer = default_initializer)
+collect_empty_columns(itr) = collect_empty_structarray(itr, initializer = default_initializer)
 
-function collect_empty_columns(itr::T) where {T}
-    S = Core.Compiler.return_type(first, Tuple{T})
-    similar(arrayof(S), 0)
-end
-
-function collect_columns(@nospecialize(itr), ::Union{Base.HasShape, Base.HasLength})
-    st = iterate(itr)
-    st === nothing && return collect_empty_columns(itr)
-    el, i = st
-    dest = similar(arrayof(typeof(el)), length(itr))
-    dest[1] = el
-    collect_to_columns!(dest, itr, 2, i)
-end
-
-function collect_to_columns!(dest::AbstractArray{T}, itr, offs, st) where {T}
-    # collect to dest array, checking the type of each result. if a result does not
-    # match, widen the result type and re-dispatch.
-    i = offs
-    while true
-        elem = iterate(itr, st)
-        elem === nothing && break
-        el, st = elem
-        if fieldwise_isa(el, T)
-            @inbounds dest[i] = el
-            i += 1
-        else
-            new = widencolumns(dest, i, el, T)
-            @inbounds new[i] = el
-            return collect_to_columns!(new, itr, i+1, st)
-        end
-    end
-    return dest
-end
-
-function collect_columns(itr, ::Base.SizeUnknown)
-    elem = iterate(itr)
-    elem === nothing && return collect_empty_columns(itr)
-    el, st = elem
-    dest = similar(arrayof(typeof(el)), 1)
-    dest[1] = el
-    grow_to_columns!(dest, itr, iterate(itr, st))
-end
+grow_to_columns!(args...) = grow_to_structarray!(args...)
+collect_to_columns!(args...) = collect_to_structarray!(args...)
 
 function collect_columns_flattened(itr)
     elem = iterate(itr)
@@ -110,75 +71,3 @@ function collect_columns_flattened!(dest::Columns{<:Pair}, itr, el::Pair, st)
     return Columns(dest_key => dest_data)
 end
 
-function grow_to_columns!(dest::AbstractArray{T}, itr, elem = iterate(itr)) where {T}
-    # collect to dest array, checking the type of each result. if a result does not
-    # match, widen the result type and re-dispatch.
-    i = length(dest)+1
-    while elem !== nothing
-        el, st = elem
-        if fieldwise_isa(el, T)
-            push!(dest, el)
-            elem = iterate(itr, st)
-            i += 1
-        else
-            new = widencolumns(dest, i, el, T)
-            push!(new, el)
-            return grow_to_columns!(new, itr, iterate(itr, st))
-        end
-    end
-    return dest
-end
-
-# extra methods if we have widened to Vector{Tuple} or Vector{NamedTuple}
-# better to not generate as this is the case where the user is sending heterogenoeus data
-fieldwise_isa(el::S, ::Type{Tuple}) where {S<:Tup} = _is_subtype(S, Tuple)
-fieldwise_isa(el::S, ::Type{NamedTuple}) where {S<:Tup} = _is_subtype(S, NamedTuple)
-
-@generated function fieldwise_isa(el::S, ::Type{T}) where {S<:Tup, T<:Tup}
-    if (fieldnames(S) == fieldnames(T)) && all(_is_subtype(s, t) for (s, t) in zip(fieldtypes(S), fieldtypes(T)))
-        return :(true)
-    else
-        return :(false)
-    end
-end
-
-@generated function fieldwise_isa(el::S, ::Type{T}) where {S, T}
-    if _is_subtype(S, T)
-        return :(true)
-    else
-        return :(false)
-    end
-end
-
-fieldwise_isa(el::Pair, ::Type{Pair{T1, T2}}) where {T1, T2}  =
-    fieldwise_isa(el.first, T1) && fieldwise_isa(el.second, T2)
-
-function widencolumns(dest, i, el::S, ::Type{T}) where{S <: Tup, T<:Tup}
-    if fieldnames(S) != fieldnames(T) || T == Tuple || T == NamedTuple
-        R = (S <: Tuple) && (T <: Tuple) ? Tuple :  (S <: NamedTuple) && (T <: NamedTuple) ? NamedTuple : Any
-        new = Array{R}(undef, length(dest))
-        copyto!(new, 1, dest, 1, i-1)
-    else
-        sp, tp = fieldtypes(S), fieldtypes(T)
-        idx = findall(collect(!(s <: t) for (s, t) in zip(sp, tp)))
-        new = dest
-        for l in idx
-            newcol = Vector{promote_type(sp[l], tp[l])}(undef, length(dest))
-            copyto!(newcol, 1, column(dest, l), 1, i-1)
-            new = setcol(new, l, newcol)
-        end
-    end
-    new
-end
-
-function widencolumns(dest, i, el::S, ::Type{T}) where{S, T}
-    new = Vector{promote_type(S, T)}(undef, length(dest))
-    copyto!(new, 1, dest, 1, i-1)
-    new
-end
-
-function widencolumns(dest::Columns{<:Pair}, i, el::Pair, ::Type{Pair{T1, T2}}) where{T1, T2}
-    dest1 = fieldwise_isa(el.first, T1) ? columns(dest).first : widencolumns(columns(dest).first, i, el.first, T1)
-    dest2 = fieldwise_isa(el.second, T2) ? columns(dest).second : widencolumns(columns(dest).second, i, el.second, T2)
-    Columns(dest1 => dest2)
-end
diff --git a/test/test_collect.jl b/test/test_collect.jl
index c6a52b8b..a4200669 100644
--- a/test/test_collect.jl
+++ b/test/test_collect.jl
@@ -10,16 +10,16 @@
     @inferred IndexedTables.collect_to_columns!(dest, itr, 2, st)
 
     v = [(a = 1, b = 2), (a = 1.2, b = 3)]
-    @test collect_columns(v) == Columns((a = [1, 1.2], b = Int[2, 3]))
-    @test typeof(collect_columns(v)) == typeof(Columns((a = [1, 1.2], b = Int[2, 3])))
+    @test collect_columns(v) == Columns((a = Real[1, 1.2], b = Int[2, 3]))
+    @test typeof(collect_columns(v)) == typeof(Columns((a = Real[1, 1.2], b = Int[2, 3])))
 
     v = [(a = 1, b = 2), (a = 1.2, b = "3")]
-    @test collect_columns(v) == Columns((a = [1, 1.2], b = Any[2, "3"]))
-    @test typeof(collect_columns(v)) == typeof(Columns((a = [1, 1.2], b = Any[2, "3"])))
+    @test collect_columns(v) == Columns((a = Real[1, 1.2], b = Any[2, "3"]))
+    @test typeof(collect_columns(v)) == typeof(Columns((a = Real[1, 1.2], b = Any[2, "3"])))
 
     v = [(a = 1, b = 2), (a = 1.2, b = 2), (a = 1, b = "3")]
-    @test collect_columns(v) == Columns((a = [1, 1.2, 1], b = Any[2, 2, "3"]))
-    @test typeof(collect_columns(v)) == typeof(Columns((a = [1, 1.2, 1], b = Any[2, 2, "3"])))
+    @test collect_columns(v) == Columns((a = Real[1, 1.2, 1], b = Any[2, 2, "3"]))
+    @test typeof(collect_columns(v)) == typeof(Columns((a = Real[1, 1.2, 1], b = Any[2, 2, "3"])))
 
     # length unknown
     itr = Iterators.filter(isodd, 1:8)
@@ -44,21 +44,21 @@ end
     @inferred collect_columns(v)
 
     v = [(1, 2), (1.2, 3)]
-    @test collect_columns(v) == Columns(([1, 1.2], Int[2, 3]))
+    @test collect_columns(v) == Columns((Real[1, 1.2], Int[2, 3]))
 
     v = [(1, 2), (1.2, "3")]
-    @test collect_columns(v) == Columns(([1, 1.2], Any[2, "3"]))
-    @test typeof(collect_columns(v)) == typeof(Columns(([1, 1.2], Any[2, "3"])))
+    @test collect_columns(v) == Columns((Real[1, 1.2], Any[2, "3"]))
+    @test typeof(collect_columns(v)) == typeof(Columns((Real[1, 1.2], Any[2, "3"])))
 
     v = [(1, 2), (1.2, 2), (1, "3")]
-    @test collect_columns(v) == Columns(([1, 1.2, 1], Any[2, 2, "3"]))
+    @test collect_columns(v) == Columns((Real[1, 1.2, 1], Any[2, 2, "3"]))
     # length unknown
     itr = Iterators.filter(isodd, 1:8)
     tuple_itr = ((i+1, i-1) for i in itr)
     @test collect_columns(tuple_itr) == Columns(([2, 4, 6, 8], [0, 2, 4, 6]))
     tuple_itr_real = (i == 1 ? (1.2, i-1) : (i+1, i-1) for i in itr)
-    @test collect_columns(tuple_itr_real) == Columns(([1.2, 4, 6, 8], [0, 2, 4, 6]))
-    @test typeof(collect_columns(tuple_itr_real)) == typeof(Columns(([1.2, 4, 6, 8], [0, 2, 4, 6])))
+    @test collect_columns(tuple_itr_real) == Columns((Real[1.2, 4, 6, 8], [0, 2, 4, 6]))
+    @test typeof(collect_columns(tuple_itr_real)) == typeof(Columns((Real[1.2, 4, 6, 8], [0, 2, 4, 6])))
 
     # empty
     itr = Iterators.filter(t -> t > 10, 1:8)
@@ -82,7 +82,7 @@ end
     @test collect_columns(itr) == collect(itr)
     real_itr = (i == 1 ? 1.5 : i for i in itr)
     @test collect_columns(real_itr) == collect(real_itr)
-    @test eltype(collect_columns(real_itr)) == Float64
+    @test eltype(collect_columns(real_itr)) == Real
 
     #empty
     itr = Iterators.filter(t -> t > 10, 1:8)
@@ -104,8 +104,8 @@ end
     @test eltype(collect_columns(v)) == Pair{Int, Int}
 
     v = (i == 1 ? (1.2 => i+1) : (i => i+1) for i in 1:3)
-    @test collect_columns(v) == Columns([1.2,2,3]=>[2,3,4])
-    @test eltype(collect_columns(v)) == Pair{Float64, Int}
+    @test collect_columns(v) == Columns(Real[1.2,2,3]=>[2,3,4])
+    @test eltype(collect_columns(v)) == Pair{Real, Int}
 
     v = ((a=i,) => (b="a$i",) for i in 1:3)
     @test collect_columns(v) == Columns(Columns((a = [1,2,3],))=>Columns((b = ["a1","a2","a3"],)))
@@ -128,21 +128,12 @@ end
     @test isequal(t, table((b = [1,1,1], a = [2, missing, 3]), pkey = :b))
 end
 
-@testset "issubtype" begin
-    @test IndexedTables._is_subtype(Int, Int)
-    @test IndexedTables._is_subtype(Int, Union{Missing, Int})
-    @test !IndexedTables._is_subtype(Union{Missing, Int}, Int)
-    @test IndexedTables._is_subtype(Union{Missing, Int}, Union{Missing, Int})
-    @test !IndexedTables._is_subtype(Union{Missing, Int}, Union{Missing,String})
-    @test !IndexedTables._is_subtype(Int, String)
-end
-
 @testset "collectflattened" begin
     t = [(:a => [1, 2]), (:b => [1, 3])]
     @test collect_columns_flattened(t) == Columns([:a, :a, :b, :b] => [1, 2, 1, 3])
     t = ([(a = 1,), (a = 2,)], [(a = 1.1,), (a = 2.2,)])
-    @test collect_columns_flattened(t) == Columns(a = [1, 2, 1.1, 2.2])
-    @test eltype(collect_columns_flattened(t)) == typeof((a=1.1,))
+    @test collect_columns_flattened(t) == Columns(a = Real[1, 2, 1.1, 2.2])
+    @test eltype(collect_columns_flattened(t)) == NamedTuple{(:a,), Tuple{Real}}
     t = [(:a => table(1:2, ["a", "b"])), (:b => table(3:4, ["c", "d"]))]
     @test table(collect_columns_flattened(t)) == table([:a, :a, :b, :b], 1:4, ["a", "b", "c", "d"], pkey = 1)
 end
diff --git a/test/test_core.jl b/test/test_core.jl
index 68ea1106..28889f4c 100644
--- a/test/test_core.jl
+++ b/test/test_core.jl
@@ -618,7 +618,7 @@ end
 
     t3 = map(x->ntuple(identity, x.x), t)
     @test isa(t3.data, Vector)
-    @test eltype(t3.data) == Tuple
+    @test eltype(t3.data) == Tuple{Int64,Int64,Int64,Int64,Vararg{Int64,N} where N}
 
     y = [1, 1//2, "x"]
     function foo(x)

From dd7862ec8a18185ac4126bfe5c7d4150d046b7d1 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 16:34:22 +0000
Subject: [PATCH 19/21] Remove type piracy

---
 src/columns.jl | 2 --
 src/tables.jl  | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/src/columns.jl b/src/columns.jl
index cef4071c..c00abbfc 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -1,7 +1,5 @@
 # to get rid of eventually
 const Columns = StructVector
-# There is a StackOverflow bug in this case in Base.unaliascopy
-Base.copy(c::Columns{<:Union{NamedTuple{(),Tuple{}}, Tuple{}}}) = c
 
 # IndexedTable-like API
 
diff --git a/src/tables.jl b/src/tables.jl
index 816aa767..e1684bba 100644
--- a/src/tables.jl
+++ b/src/tables.jl
@@ -1,10 +1,7 @@
 #-----------------------------------------------------------------------# Columns 
 const TableColumns = Columns{T} where {T<:NamedTuple}
 
-# Columns(x; kw...) = Columns(Tables.columntable(x); kw...)
-
 Tables.istable(::Type{<:TableColumns}) = true
-Tables.materializer(c::TableColumns) = Columns
 
 Tables.rowaccess(c::TableColumns) = true
 Tables.rows(c::TableColumns) = c

From 0458261483f8b0fc29d89e987dc7168137b88860 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 17:06:29 +0000
Subject: [PATCH 20/21] clean up tables integration

---
 src/columns.jl |  2 +-
 src/tables.jl  | 15 +--------------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/columns.jl b/src/columns.jl
index c00abbfc..74226fba 100644
--- a/src/columns.jl
+++ b/src/columns.jl
@@ -51,8 +51,8 @@ available selection options and syntax.
 """
 function columns end
 
+columns(c::Columns) = fieldarrays(c)
 columns(c::Columns{<:Tuple}) = Tuple(fieldarrays(c))
-columns(c::Columns{<:NamedTuple}) = fieldarrays(c)
 columns(c::Columns{<:Pair}) = c.first => c.second
 
 """
diff --git a/src/tables.jl b/src/tables.jl
index e1684bba..7bc93967 100644
--- a/src/tables.jl
+++ b/src/tables.jl
@@ -1,18 +1,5 @@
-#-----------------------------------------------------------------------# Columns 
-const TableColumns = Columns{T} where {T<:NamedTuple}
-
-Tables.istable(::Type{<:TableColumns}) = true
-
-Tables.rowaccess(c::TableColumns) = true
-Tables.rows(c::TableColumns) = c
-Tables.schema(c::TableColumns) = Tables.Schema(colnames(c), Tuple(map(eltype, c.columns)))
-
-Tables.columnaccess(c::TableColumns) = true
-Tables.columns(c::TableColumns) = c.columns
-# Tables.schema already defined for NamedTuple of Vectors (c.columns)
-
 #-----------------------------------------------------------------------# IndexedTable
-Tables.istable(::Type{IndexedTable{C}}) where {C<:TableColumns} = true
+Tables.istable(::Type{IndexedTable{C}}) where {C<:Columns} = Tables.istable(C)
 Tables.materializer(t::IndexedTable) = table
 for f in [:rowaccess, :rows, :columnaccess, :columns, :schema]
     @eval Tables.$f(t::IndexedTable) = Tables.$f(Columns(columns(t)))

From 3a0eeeb9693eaaa58ebc8165711a7248402ad6a0 Mon Sep 17 00:00:00 2001
From: Pietro Vertechi <pietro.vertechi@neuro.fchampalimaud.org>
Date: Tue, 18 Dec 2018 19:28:06 +0000
Subject: [PATCH 21/21] lower bound structarray

---
 REQUIRE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/REQUIRE b/REQUIRE
index c61eff94..4780ae6e 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -6,4 +6,4 @@ TableTraits 0.3.0
 TableTraitsUtils 0.2.0
 IteratorInterfaceExtensions 0.1.0
 Tables
-StructArrays
+StructArrays 0.2.0