diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4222741a0..5bbb7f077 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 env: - JULIA_NUM_THREADS: 4 + JULIA_NUM_THREADS: 4,1 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: diff --git a/NEWS.md b/NEWS.md index bc0606d74..14b1f1fd1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,24 @@ ## New functionalities +* Allow passing multiple values to add in `push!`, `pushfirst!`, + `append!`, and `prepend!` + ([#3372](https://github.com/JuliaData/DataFrames.jl/pull/3372)) * `rename` and `rename!` now allow to apply a function transforming column names only to a subset of the columns specified by the `cols` keyword argument ([#3380](https://github.com/JuliaData/DataFrames.jl/pull/3380)) +* `mapcols` and `mapcols!` now allow to apply a function transforming + columns only to a subset of the columns specified by the `cols` + keyword argument + ([#3386](https://github.com/JuliaData/DataFrames.jl/pull/3386)) + +## Bug fixes + +* Always use the default thread pool for multithreaded operations, + instead of using the interactive thread pool when Julia was started + with `-tM,N` with N > 0 + ([#3385](https://github.com/JuliaData/DataFrames.jl/pull/3385)) # DataFrames.jl v1.6.1 Release Notes diff --git a/Project.toml b/Project.toml index 09dd6c284..df31d9fb6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DataFrames" uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.6.1" +version = "1.7.0" [deps] Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index 625cf6066..1cb8dfd66 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -7,7 +7,8 @@ CurrentModule = DataFrames ## Multithreading support By default, selected operations in DataFrames.jl automatically use multiple threads -when available. It is task-based and implemented using the `@spawn` macro from Julia Base. +when available. Multi-threading is task-based and implemented using the `@spawn` +macro from Julia Base. Tasks are therefore scheduled on the `:default` threadpool. Functions that take user-defined functions and may run it in parallel accept a `threads` keyword argument which allows disabling multithreading when the provided function requires serial execution or is not thread-safe. diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 600601506..a812365ee 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -254,7 +254,7 @@ function rename!(df::AbstractDataFrame, return df end - # needed because of dispach ambiguity + # needed because of dispatch ambiguity function rename!(df::AbstractDataFrame) _drop_all_nonnote_metadata!(parent(df)) return df diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 3fdc0523d..c81228fb1 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -107,20 +107,20 @@ as a `DataFrameRows` over a view of rows of parent of `dfr`. julia> collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2)) 3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}: 2×1 DataFrameRows - Row │ x - │ Int64 + Row │ x + │ Int64 ─────┼─────── 1 │ 1 2 │ 2 2×1 DataFrameRows - Row │ x - │ Int64 + Row │ x + │ Int64 ─────┼─────── 1 │ 3 2 │ 4 1×1 DataFrameRows - Row │ x - │ Int64 + Row │ x + │ Int64 ─────┼─────── 1 │ 5 ``` @@ -408,12 +408,17 @@ Base.show(dfcs::DataFrameColumns; summary=summary, eltypes=eltypes, truncate=truncate, kwargs...) """ - mapcols(f::Union{Function, Type}, df::AbstractDataFrame) + mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All()) + +Return a `DataFrame` where each column of `df` selected by `cols` (by default, all columns) +is transformed using function `f`. +Columns not selected by `cols` are copied. -Return a `DataFrame` where each column of `df` is transformed using function `f`. `f` must return `AbstractVector` objects all with the same length or scalars (all values other than `AbstractVector` are considered to be a scalar). +The `cols` column selector can be any value accepted as column selector by the `names` function. + Note that `mapcols` guarantees not to reuse the columns from `df` in the returned `DataFrame`. If `f` returns its argument then it gets copied before being stored. @@ -440,15 +445,32 @@ julia> mapcols(x -> x.^2, df) 2 │ 4 144 3 │ 9 169 4 │ 16 196 + +julia> mapcols(x -> x.^2, df, cols=r"y") +4×2 DataFrame + Row │ x y + │ Int64 Int64 +─────┼────────────── + 1 │ 1 121 + 2 │ 2 144 + 3 │ 3 169 + 4 │ 4 196 ``` """ -function mapcols(f::Union{Function, Type}, df::AbstractDataFrame) +function mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All()) + if cols === All() || cols === Colon() + apply = Iterators.repeated(true) + else + picked = Set(names(df, cols)) + apply = Bool[name in picked for name in names(df)] + end + # note: `f` must return a consistent length vs = AbstractVector[] seenscalar = false seenvector = false - for v in eachcol(df) - fv = f(v) + for (v, doapply) in zip(eachcol(df), apply) + fv = doapply ? f(v) : copy(v) if fv isa AbstractVector if seenscalar throw(ArgumentError("mixing scalars and vectors in mapcols not allowed")) @@ -470,9 +492,12 @@ function mapcols(f::Union{Function, Type}, df::AbstractDataFrame) end """ - mapcols!(f::Union{Function, Type}, df::DataFrame) + mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All()) + +Update a `DataFrame` in-place where each column of `df` selected by `cols` (by default, all columns) +is transformed using function `f`. +Columns not selected by `cols` are left unchanged. -Update a `DataFrame` in-place where each column of `df` is transformed using function `f`. `f` must return `AbstractVector` objects all with the same length or scalars (all values other than `AbstractVector` are considered to be a scalar). @@ -503,20 +528,39 @@ julia> df 2 │ 4 144 3 │ 9 169 4 │ 16 196 + +julia> mapcols!(x -> 2 * x, df, cols=r"x"); + +julia> df +4×2 DataFrame + Row │ x y + │ Int64 Int64 +─────┼────────────── + 1 │ 2 121 + 2 │ 8 144 + 3 │ 18 169 + 4 │ 32 196 ``` """ -function mapcols!(f::Union{Function, Type}, df::DataFrame) - # note: `f` must return a consistent length +function mapcols!(f::Union{Function,Type}, df::DataFrame; cols=All()) if ncol(df) == 0 # skip if no columns _drop_all_nonnote_metadata!(df) return df end + if cols === All() || cols === Colon() + apply = Iterators.repeated(true) + else + picked = Set(names(df, cols)) + apply = Bool[name in picked for name in names(df)] + end + + # note: `f` must return a consistent length vs = AbstractVector[] seenscalar = false seenvector = false - for v in eachcol(df) - fv = f(v) + for (v, doapply) in zip(eachcol(df), apply) + fv = doapply ? f(v) : v if fv isa AbstractVector if seenscalar throw(ArgumentError("mixing scalars and vectors in mapcols not allowed")) diff --git a/src/dataframe/insertion.jl b/src/dataframe/insertion.jl index ec148065c..1655dcb19 100644 --- a/src/dataframe/insertion.jl +++ b/src/dataframe/insertion.jl @@ -1,12 +1,10 @@ """ - append!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - append!(df::DataFrame, table; cols::Symbol=:setequal, + append!(df::DataFrame, tables...; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) -Add the rows of `df2` to the end of `df`. If the second argument `table` is not -an `AbstractDataFrame` then it is converted using `DataFrame(table, -copycols=false)` before being appended. +Add the rows of tables passed as `tables` to the end of `df`. If the table is not +an `AbstractDataFrame` then it is converted using +`DataFrame(table, copycols=false)` before being appended. The exact behavior of `append!` depends on the `cols` argument: * If `cols == :setequal` (this is the default) then `df2` must contain exactly @@ -78,18 +76,53 @@ julia> df1 4 │ 4 4 5 │ 5 5 6 │ 6 6 + +julia> append!(df2, DataFrame(A=1), (; C=1:2), cols=:union) +6×3 DataFrame + Row │ A B C + │ Float64? Int64? Int64? +─────┼───────────────────────────── + 1 │ 4.0 4 missing + 2 │ 5.0 5 missing + 3 │ 6.0 6 missing + 4 │ 1.0 missing missing + 5 │ missing missing 1 + 6 │ missing missing 2 ``` """ Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) = _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true) +function Base.append!(df::DataFrame, table; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if table isa Dict && cols == :orderequal + throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " * + "`:orderequal` is not allowed as it is unordered")) + end + append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote) +end + +function Base.append!(df::DataFrame, @nospecialize tables...; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) + throw(ArgumentError("`cols` keyword argument must be " * + ":orderequal, :setequal, :intersect, :subset or :union)")) + end + + return foldl((df, table) -> append!(df, table, cols=cols, promote=promote), + collect(Any, tables), init=df) +end + """ - prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - prepend!(df::DataFrame, table; cols::Symbol=:setequal, + prepend!(df::DataFrame, tables...; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) +Add the rows of tables passed as `tables` to the beginning of `df`. If the table is not +an `AbstractDataFrame` then it is converted using +`DataFrame(table, copycols=false)` before being appended. + Add the rows of `df2` to the beginning of `df`. If the second argument `table` is not an `AbstractDataFrame` then it is converted using `DataFrame(table, copycols=false)` before being prepended. @@ -164,12 +197,45 @@ julia> df1 4 │ 1 1 5 │ 2 2 6 │ 3 3 + +julia> prepend!(df2, DataFrame(A=1), (; C=1:2), cols=:union) +6×3 DataFrame + Row │ A B C + │ Float64? Int64? Int64? +─────┼───────────────────────────── + 1 │ 1.0 missing missing + 2 │ missing missing 1 + 3 │ missing missing 2 + 4 │ 4.0 4 missing + 5 │ 5.0 5 missing + 6 │ 6.0 6 missing ``` """ Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) = _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false) +function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if table isa Dict && cols == :orderequal + throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " * + "`:orderequal` is not allowed as it is unordered")) + end + prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote) +end + +function Base.prepend!(df::DataFrame, @nospecialize tables...; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) + throw(ArgumentError("`cols` keyword argument must be " * + ":orderequal, :setequal, :intersect, :subset or :union)")) + end + + return foldr((table, df) -> prepend!(df, table, cols=cols, promote=promote), + collect(Any, tables), init=df) +end + function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol, promote::Bool, atend::Bool) if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) @@ -355,6 +421,10 @@ following way: added to `df` (using `missing` for existing rows) and a `missing` value is pushed to columns missing in `row` that are present in `df`. +If `row` is not a `DataFrameRow`, `NamedTuple`, `AbstractDict`, or `Tables.AbstractRow` +the `cols` keyword argument must be `:setequal` (the default), +because such rows do not provide column name information. + If `promote=true` and element type of a column present in `df` does not allow the type of a pushed argument then a new column with a promoted element type allowing it is freshly allocated and stored in `df`. If `promote=false` an error @@ -371,12 +441,14 @@ $METADATA_FIXED """ """ - push!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false) + push!(df::DataFrame, row::Union{Tuple, AbstractArray}...; + cols::Symbol=:setequal, promote::Bool=false) push!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict, - Tables.AbstractRow}; + Tables.AbstractRow}...; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) Add one row at the end of `df` in-place, taking the values from `row`. +Several rows can be added by passing them as separate arguments. $INSERTION_COMMON @@ -452,18 +524,36 @@ julia> push!(df, NamedTuple(), cols=:subset) 6 │ 11 12 missing 7 │ 1.0 missing 1.0 8 │ missing missing missing + +julia> push!(DataFrame(a=1, b=2), (3, 4), (5, 6)) +3×2 DataFrame + Row │ a b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 2 + 2 │ 3 4 + 3 │ 5 6 ``` """ -Base.push!(df::DataFrame, row::Any; promote::Bool=false) = - _row_inserter!(df, -1, row, Val{:push}(), promote) +function Base.push!(df::DataFrame, row::Any; + cols=:setequal, promote::Bool=false) + if cols !== :setequal + throw(ArgumentError("`cols` can only be `:setequal` when `row` is a `$(typeof(row))` " * + "as this type does not provide column names")) + end + + return _row_inserter!(df, -1, row, Val{:push}(), promote) +end """ - pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false) + pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...; + cols::Symbol=:setequal, promote::Bool=false) pushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict, - Tables.AbstractRow}; + Tables.AbstractRow}...; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) Add one row at the beginning of `df` in-place, taking the values from `row`. +Several rows can be added by passing them as separate arguments. $INSERTION_COMMON @@ -539,13 +629,30 @@ julia> pushfirst!(df, NamedTuple(), cols=:subset) 6 │ a 1 missing 7 │ b 2 missing 8 │ c 3 missing + +julia> pushfirst!(DataFrame(a=1, b=2), (3, 4), (5, 6)) +3×2 DataFrame + Row │ a b + │ Int64 Int64 +─────┼────────────── + 1 │ 3 4 + 2 │ 5 6 + 3 │ 1 2 ``` """ -Base.pushfirst!(df::DataFrame, row::Any; promote::Bool=false) = - _row_inserter!(df, -1, row, Val{:pushfirst}(), promote) +function Base.pushfirst!(df::DataFrame, row::Any; + cols=:setequal, promote::Bool=false) + if cols !== :setequal + throw(ArgumentError("`cols` can only be `:setequal` when `row` is a `$(typeof(row))` " * + "as this type does not provide column names")) + end + + return _row_inserter!(df, -1, row, Val{:pushfirst}(), promote) +end """ - insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray}; promote::Bool=false) + insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray}; + cols::Symbol=:setequal, promote::Bool=false) insert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple, AbstractDict, Tables.AbstractRow}; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) @@ -629,7 +736,13 @@ julia> insert!(df, 3, NamedTuple(), cols=:subset) 8 │ 1.0 missing 1.0 ``` """ -function Base.insert!(df::DataFrame, index::Integer, row::Any; promote::Bool=false) +function Base.insert!(df::DataFrame, index::Integer, row::Any; + cols=:setequal, promote::Bool=false) + if cols !== :setequal + throw(ArgumentError("`cols` can only be `:setequal` when `row` is a `$(typeof(row))` " * + "as this type does not provide column names")) + end + index isa Bool && throw(ArgumentError("invalid index: $index of type Bool")) 1 <= index <= nrow(df)+1 || throw(ArgumentError("invalid index: $index for data frame with $(nrow(df)) rows")) @@ -986,3 +1099,37 @@ function _row_inserter!(df::DataFrame, loc::Integer, _drop_all_nonnote_metadata!(df) return df end + +function Base.push!(df::DataFrame, @nospecialize rows...; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) + throw(ArgumentError("`cols` keyword argument must be " * + ":orderequal, :setequal, :intersect, :subset or :union)")) + end + with_names_count = count(rows) do row + row isa Union{DataFrameRow,AbstractDict,NamedTuple,Tables.AbstractRow} + end + if 0 < with_names_count < length(rows) + throw(ArgumentError("Mixing rows with column names and without column names " * + "in a single `push!` call is not allowed")) + end + return foldl((df, row) -> push!(df, row, cols=cols, promote=promote), rows, init=df) +end + +function Base.pushfirst!(df::DataFrame, @nospecialize rows...; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) + throw(ArgumentError("`cols` keyword argument must be " * + ":orderequal, :setequal, :intersect, :subset or :union)")) + end + with_names_count = count(rows) do row + row isa Union{DataFrameRow,AbstractDict,NamedTuple,Tables.AbstractRow} + end + if 0 < with_names_count < length(rows) + throw(ArgumentError("Mixing rows with column names and without column names " * + "in a single `push!` call is not allowed")) + end + return foldr((row, df) -> pushfirst!(df, row, cols=cols, promote=promote), rows, init=df) +end diff --git a/src/other/tables.jl b/src/other/tables.jl index 4213c9888..7b3335575 100644 --- a/src/other/tables.jl +++ b/src/other/tables.jl @@ -63,31 +63,13 @@ function DataFrame(x; copycols::Union{Nothing, Bool}=nothing) end # the logic here relies on the fact that Tables.CopiedColumns -# is the only exception for default copycols value +# is the only exception for default copycols value DataFrame(x, cnames::AbstractVector; makeunique::Bool=false, copycols::Union{Nothing, Bool}=nothing) = rename!(DataFrame(x, copycols=something(copycols, !(x isa Tables.CopiedColumns))), _name2symbol(cnames), makeunique=makeunique) -function Base.append!(df::DataFrame, table; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - if table isa Dict && cols == :orderequal - throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " * - "`:orderequal` is not allowed as it is unordered")) - end - append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote) -end - -function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - if table isa Dict && cols == :orderequal - throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " * - "`:orderequal` is not allowed as it is unordered")) - end - prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote) -end - # This supports the Tables.RowTable type; needed to avoid ambiguities w/ another constructor DataFrame(x::AbstractVector{NamedTuple{names, T}}; copycols::Bool=true) where {names, T} = fromcolumns(Tables.columns(Tables.IteratorWrapper(x)), collect(names), copycols=false) diff --git a/src/other/utils.jl b/src/other/utils.jl index 455c406f4..78f91785b 100644 --- a/src/other/utils.jl +++ b/src/other/utils.jl @@ -221,16 +221,23 @@ end Equivalent to `Threads.@spawn` if `threads === true`, otherwise run `expr` and return a `Task` that returns its value. """ -macro spawn_or_run_task(threads, expr) - letargs = Base._lift_one_interp!(expr) +macro spawn_or_run_task(threads, ex) + letargs = Base._lift_one_interp!(ex) - thunk = esc(:(()->($expr))) + thunk = :(()->($(esc(ex)))) + @static if VERSION >= v"1.10.0-DEV" + Base.replace_linenums!(thunk, __source__) + end var = esc(Base.sync_varname) + spawn_set_thrpool = VERSION >= v"1.9.0" ? + :(Base.Threads._spawn_set_thrpool(task, :default)) : + :() quote let $(letargs...) if $(esc(threads)) local task = Task($thunk) task.sticky = false + $(spawn_set_thrpool) else # Run expr immediately res = $thunk() @@ -253,16 +260,23 @@ end Equivalent to `Threads.@spawn` if `threads === true`, otherwise run `expr`. """ -macro spawn_or_run(threads, expr) - letargs = Base._lift_one_interp!(expr) +macro spawn_or_run(threads, ex) + letargs = Base._lift_one_interp!(ex) - thunk = esc(:(()->($expr))) + thunk = :(()->($(esc(ex)))) + if VERSION >= v"1.10.0-DEV" + Base.replace_linenums!(thunk, __source__) + end var = esc(Base.sync_varname) + spawn_set_thrpool = VERSION >= v"1.9.0" ? + :(Base.Threads._spawn_set_thrpool(task, :default)) : + :() quote let $(letargs...) if $(esc(threads)) local task = Task($thunk) task.sticky = false + $(spawn_set_thrpool) if $(Expr(:islocal, var)) put!($var, task) end diff --git a/test/insertion.jl b/test/insertion.jl index 19bc175e5..5974820ba 100644 --- a/test/insertion.jl +++ b/test/insertion.jl @@ -1370,4 +1370,112 @@ end end end +@testset "multi element append!/prepend!/push!/pushfirst!" begin + df = DataFrame(a=1, b=2) + @test append!(df) == DataFrame(a=1, b=2) + @test prepend!(df) == DataFrame(a=1, b=2) + @test push!(df) == DataFrame(a=1, b=2) + @test pushfirst!(df) == DataFrame(a=1, b=2) + @test_throws ArgumentError append!(df, cols=:x) == DataFrame(a=1, b=2) + @test_throws ArgumentError prepend!(df, cols=:x) == DataFrame(a=1, b=2) + @test_throws ArgumentError push!(df, cols=:x) == DataFrame(a=1, b=2) + @test_throws ArgumentError pushfirst!(df, cols=:x) == DataFrame(a=1, b=2) + + for x in (DataFrame(a=3, b=4), (a=[3], b=[4]), [(a=3, b=4)]), + y in (DataFrame(a=5, b=6), (a=[5], b=[6]), [(a=5, b=6)]), + z in (DataFrame(a=7, b=8), (a=[7], b=[8]), [(a=7, b=8)]) + @test append!(copy(df), x, y) == + DataFrame(a=1:2:5, b=2:2:6) + @test append!(copy(df), x, y, z) == + DataFrame(a=1:2:7, b=2:2:8) + @test prepend!(copy(df), x, y) == + DataFrame(a=[3, 5, 1], b=[4, 6, 2]) + @test prepend!(copy(df), x, y, z) == + DataFrame(a=[3, 5, 7, 1], b=[4, 6, 8, 2]) + end + + for x in (DataFrame(a=3, b=4)[1, :], (a=3, b=4)), + y in (DataFrame(a=5, b=6)[1, :], (a=5, b=6)), + z in (DataFrame(a=7, b=8)[1, :], (a=7, b=8)) + @test push!(copy(df), x, y) == + DataFrame(a=1:2:5, b=2:2:6) + @test push!(copy(df), x, y, z) == + DataFrame(a=1:2:7, b=2:2:8) + @test pushfirst!(copy(df), x, y) == + DataFrame(a=[3, 5, 1], b=[4, 6, 2]) + @test pushfirst!(copy(df), x, y, z) == + DataFrame(a=[3, 5, 7, 1], b=[4, 6, 8, 2]) + for cols in (:orderequal, :setequal, :union, :subset, :intersect) + @test push!(copy(df), x, y, cols=cols) == + DataFrame(a=1:2:5, b=2:2:6) + @test push!(copy(df), x, y, z, cols=cols) == + DataFrame(a=1:2:7, b=2:2:8) + @test pushfirst!(copy(df), x, y, cols=cols) == + DataFrame(a=[3, 5, 1], b=[4, 6, 2]) + @test pushfirst!(copy(df), x, y, z, cols=cols) == + DataFrame(a=[3, 5, 7, 1], b=[4, 6, 8, 2]) + end + end + + for x in ((3, 4), [3, 4]), y in ((5, 6), [5, 6]), z in ((7, 8), [7, 8]) + @test push!(copy(df), x, y) == + DataFrame(a=1:2:5, b=2:2:6) + @test push!(copy(df), x, y, z) == + DataFrame(a=1:2:7, b=2:2:8) + @test pushfirst!(copy(df), x, y) == + DataFrame(a=[3, 5, 1], b=[4, 6, 2]) + @test pushfirst!(copy(df), x, y, z) == + DataFrame(a=[3, 5, 7, 1], b=[4, 6, 8, 2]) + end + + for x in (DataFrame(a=3, b=4), (a=[3], b=[4]), [(a=3, b=4)]), + y in (DataFrame(a=5, c=6), (a=[5], c=[6]), [(a=5, c=6)]), + z in (DataFrame(a="7", d=8), (a=["7"], d=[8]), [(a="7", d=8)]) + @test append!(copy(df), x, y, cols=:union) ≅ + DataFrame(a=1:2:5, b=[2, 4, missing], c=[missing, missing, 6]) + @test append!(copy(df), x, y, z, cols=:union) ≅ + DataFrame(a=[1, 3, 5, "7"], b=[2, 4, missing, missing], + c=[missing, missing, 6, missing], + d=[missing, missing, missing, 8]) + @test prepend!(copy(df), x, y, cols=:union) ≅ + DataFrame(a=[3, 5, 1], b=[4, missing, 2], c=[missing, 6, missing]) + @test prepend!(copy(df), x, y, z, cols=:union) ≅ + DataFrame(a=[3, 5, "7", 1], b=[4, missing, missing, 2], + d=[missing, missing, 8, missing], + c=[missing, 6, missing, missing],) + end + + for x in (DataFrame(a=3, b=4)[1, :], (a=3, b=4)), + y in (DataFrame(a=5, c=6)[1, :], (a=5, c=6)), + z in (DataFrame(a="7", d=8)[1, :], (a="7", d=8)) + @test push!(copy(df), x, y, cols=:union) ≅ + DataFrame(a=1:2:5, b=[2, 4, missing], c=[missing, missing, 6]) + @test push!(copy(df), x, y, z, cols=:union) ≅ + DataFrame(a=[1, 3, 5, "7"], b=[2, 4, missing, missing], + c=[missing, missing, 6, missing], + d=[missing, missing, missing, 8]) + @test pushfirst!(copy(df), x, y, cols=:union) ≅ + DataFrame(a=[3, 5, 1], b=[4, missing, 2], c=[missing, 6, missing]) + @test pushfirst!(copy(df), x, y, z, cols=:union) ≅ + DataFrame(a=[3, 5, "7", 1], b=[4, missing, missing, 2], + d=[missing, missing, 8, missing], + c=[missing, 6, missing, missing],) + end + + @test_throws ArgumentError push!(df, (1, 2), cols=:union) + @test_throws ArgumentError pushfirst!(df, (1, 2), cols=:union) + + @test_throws ArgumentError push!(df, (1, 2), (1, 2), cols=:union) + @test_throws ArgumentError pushfirst!(df, (1, 2), (1, 2), cols=:union) + + @test_throws ArgumentError push!(df, (a=1, b=2), (1, 2)) + @test_throws ArgumentError pushfirst!(df, (a=1, b=2), (1, 2)) + @test_throws ArgumentError push!(df, (1, 2), (a=1, b=2)) + @test_throws ArgumentError pushfirst!(df, (1, 2), (a=1, b=2)) + + @test insert!(DataFrame(a=1:3, b=11:13), 2, (0, 10), cols=:setequal) == + DataFrame(a=[1, 0, 2, 3], b=[11, 10, 12, 13]) + @test_throws ArgumentError insert!(df, 1, (1, 2), cols=:orderequal) +end + end # module diff --git a/test/iteration.jl b/test/iteration.jl index 2202b33d9..4c1b9d0d1 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -78,6 +78,19 @@ end df = mapcols(x -> 2:2, df) @test df == DataFrame(a=2) @test df.a isa Vector{Int} + + df = DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4]) + @test mapcols(x -> 2x, df, cols=r"a") == DataFrame(a1=[2, 4], a2=[4, 6], b=[3, 4]) + @test mapcols(x -> 2x, df, cols="b") == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8]) + @test mapcols(x -> 2x, df, cols=Not(r"a")) == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8]) + @test mapcols(x -> 2x, df, cols=Int) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8]) + @test mapcols(x -> 2x, df, cols=Not(All())) == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4]) + @test mapcols(x -> 2x, df, cols=:) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8]) + + df2 = mapcols(x -> 2x, df, cols="b") + @test df2.a1 == df.a1 && df2.a1 !== df.a1 + @test df2.a2 == df.a2 && df2.a2 !== df.a2 + @test df2.b == 2*df.b end @testset "mapcols!" begin @@ -109,6 +122,18 @@ end mapcols!(x -> 2:2, df) @test df == DataFrame(a=2) @test df.a isa Vector{Int} + + df = DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4]) + @test mapcols!(x -> 2x, copy(df), cols=r"a") == DataFrame(a1=[2, 4], a2=[4, 6], b=[3, 4]) + @test mapcols!(x -> 2x, copy(df), cols="b") == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8]) + @test mapcols!(x -> 2x, copy(df), cols=Not(r"a")) == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8]) + @test mapcols!(x -> 2x, copy(df), cols=Int) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8]) + @test mapcols!(x -> 2x, copy(df), cols=Not(All())) == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4]) + @test mapcols!(x -> 2x, copy(df), cols=:) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8]) + a1, a2, b = eachcol(df) + mapcols!(x -> 2x, df, cols=Not(All())) + @test df == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4]) + @test df.a1 === a1 && df.a2 === a2 && df.b === b end @testset "SubDataFrame" begin diff --git a/test/select.jl b/test/select.jl index 33e627300..67f97df2f 100644 --- a/test/select.jl +++ b/test/select.jl @@ -2853,7 +2853,7 @@ end DataFrame(x=[missing, missing, missing]) @test combine(table, AsTable([p, q]) => ByRow(median∘skipmissing) => :x) == DataFrame(x=[1, 2, 4]) - # a bit surpriting how non-broadcasted and broadcasted minimum works + # a bit surprising how non-broadcasted and broadcasted minimum works @test combine(table, AsTable([p, q]) => minimum => :x) == DataFrame(x=[1, 2, 4]) @test combine(table, AsTable([p, q]) => ByRow(minimum) => :x) ≅