JuliaData · bkamins · Oct 17, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -9,6 +9,10 @@
   column names only to a subset of the columns specified by the `cols`
   keyword argument
   ([#3380](https://github.com/JuliaData/DataFrames.jl/pull/3380))
+* `mapcols` and `mapcols!` now allow to apply a function transforming
+  columns only to a subset of the columns specified by the `cols`
+  keyword argument
+  ([#3386](https://github.com/JuliaData/DataFrames.jl/pull/3386))
 
 ## Bug fixes
 

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -107,20 +107,20 @@ as a `DataFrameRows` over a view of rows of parent of `dfr`.
 julia> collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))
 3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}:
  2×1 DataFrameRows
- Row │ x     
-     │ Int64 
+ Row │ x
+     │ Int64
 ─────┼───────
    1 │     1
    2 │     2
  2×1 DataFrameRows
- Row │ x     
-     │ Int64 
+ Row │ x
+     │ Int64
 ─────┼───────
    1 │     3
    2 │     4
  1×1 DataFrameRows
- Row │ x     
-     │ Int64 
+ Row │ x
+     │ Int64
 ─────┼───────
    1 │     5
 ```
@@ -408,12 +408,17 @@ Base.show(dfcs::DataFrameColumns;
          summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)
 
 """
-    mapcols(f::Union{Function, Type}, df::AbstractDataFrame)
+    mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())
+
+Return a `DataFrame` where each column of `df` selected by `cols` (by default, all columns)
+is transformed using function `f`.
+Columns not selected by `cols` are copied.
 
-Return a `DataFrame` where each column of `df` is transformed using function `f`.
 `f` must return `AbstractVector` objects all with the same length or scalars
 (all values other than `AbstractVector` are considered to be a scalar).
 
+The `cols` column selector can be any value accepted as column selector by the `names` function.
+
 Note that `mapcols` guarantees not to reuse the columns from `df` in the returned
 `DataFrame`. If `f` returns its argument then it gets copied before being stored.
 
@@ -440,15 +445,32 @@ julia> mapcols(x -> x.^2, df)
    2 │     4    144
    3 │     9    169
    4 │    16    196
+
+julia> mapcols(x -> x.^2, df, cols=r"y")
+4×2 DataFrame
+ Row │ x      y
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1    121
+   2 │     2    144
+   3 │     3    169
+   4 │     4    196
 ```
 """
-function mapcols(f::Union{Function, Type}, df::AbstractDataFrame)
+function mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())
+    if cols === All() || cols === Colon()
+        apply = Iterators.repeated(true)
+    else
+        picked = Set(names(df, cols))
+        apply = Bool[name in picked for name in names(df)]
+    end
+
     # note: `f` must return a consistent length
     vs = AbstractVector[]
     seenscalar = false
     seenvector = false
-    for v in eachcol(df)
-        fv = f(v)
+    for (v, doapply) in zip(eachcol(df), apply)
+        fv = doapply ? f(v) : copy(v)
         if fv isa AbstractVector
             if seenscalar
                 throw(ArgumentError("mixing scalars and vectors in mapcols not allowed"))
@@ -470,9 +492,12 @@ function mapcols(f::Union{Function, Type}, df::AbstractDataFrame)
 end
 
 """
-    mapcols!(f::Union{Function, Type}, df::DataFrame)
+    mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All())
+
+Update a `DataFrame` in-place where each column of `df` selected by `cols` (by default, all columns)
+is transformed using function `f`.
+Columns not selected by `cols` are left unchanged.
 
-Update a `DataFrame` in-place where each column of `df` is transformed using function `f`.
 `f` must return `AbstractVector` objects all with the same length or scalars
 (all values other than `AbstractVector` are considered to be a scalar).
 
@@ -503,20 +528,39 @@ julia> df
    2 │     4    144
    3 │     9    169
    4 │    16    196
+
+julia> mapcols!(x -> 2 * x, df, cols=r"x");
+
+julia> df
+4×2 DataFrame
+ Row │ x      y
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2    121
+   2 │     8    144
+   3 │    18    169
+   4 │    32    196
 ```
 """
-function mapcols!(f::Union{Function, Type}, df::DataFrame)
-    # note: `f` must return a consistent length
+function mapcols!(f::Union{Function,Type}, df::DataFrame; cols=All())
     if ncol(df) == 0 # skip if no columns
         _drop_all_nonnote_metadata!(df)
         return df
     end
 
+    if cols === All() || cols === Colon()
+        apply = Iterators.repeated(true)
+    else
+        picked = Set(names(df, cols))
+        apply = Bool[name in picked for name in names(df)]
+    end
+
+    # note: `f` must return a consistent length
     vs = AbstractVector[]
     seenscalar = false
     seenvector = false
-    for v in eachcol(df)
-        fv = f(v)
+    for (v, doapply) in zip(eachcol(df), apply)
+        fv = doapply ? f(v) : v
         if fv isa AbstractVector
             if seenscalar
                 throw(ArgumentError("mixing scalars and vectors in mapcols not allowed"))

diff --git a/test/iteration.jl b/test/iteration.jl
@@ -78,6 +78,19 @@ end
     df = mapcols(x -> 2:2, df)
     @test df == DataFrame(a=2)
     @test df.a isa Vector{Int}
+
+    df = DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
+    @test mapcols(x -> 2x, df, cols=r"a") == DataFrame(a1=[2, 4], a2=[4, 6], b=[3, 4])
+    @test mapcols(x -> 2x, df, cols="b") == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
+    @test mapcols(x -> 2x, df, cols=Not(r"a")) == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
+    @test mapcols(x -> 2x, df, cols=Int) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
+    @test mapcols(x -> 2x, df, cols=Not(All())) == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
+    @test mapcols(x -> 2x, df, cols=:) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
+
+    df2 = mapcols(x -> 2x, df, cols="b")
+    @test df2.a1 == df.a1 && df2.a1 !== df.a1
+    @test df2.a2 == df.a2 && df2.a2 !== df.a2
+    @test df2.b == 2*df.b
 end
 
 @testset "mapcols!" begin
@@ -109,6 +122,18 @@ end
     mapcols!(x -> 2:2, df)
     @test df == DataFrame(a=2)
     @test df.a isa Vector{Int}
+
+    df = DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
+    @test mapcols!(x -> 2x, copy(df), cols=r"a") == DataFrame(a1=[2, 4], a2=[4, 6], b=[3, 4])
+    @test mapcols!(x -> 2x, copy(df), cols="b") == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
+    @test mapcols!(x -> 2x, copy(df), cols=Not(r"a")) == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
+    @test mapcols!(x -> 2x, copy(df), cols=Int) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
+    @test mapcols!(x -> 2x, copy(df), cols=Not(All())) == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
+    @test mapcols!(x -> 2x, copy(df), cols=:) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
+    a1, a2, b = eachcol(df)
+    mapcols!(x -> 2x, df, cols=Not(All()))
+    @test df == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
+    @test df.a1 === a1 && df.a2 === a2 && df.b === b
 end
 
 @testset "SubDataFrame" begin