Skip to content

Commit

Permalink
add findcols
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Oct 18, 2023
1 parent 1a5da8a commit 524e034
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 0 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
columns only to a subset of the columns specified by the `cols`
keyword argument
([#3386](https://github.com/JuliaData/DataFrames.jl/pull/3386))
* add `findcols` that returns a vector of integer column indices
of a data frame that meet the passed condition function
([#3389](https://github.com/JuliaData/DataFrames.jl/pull/3389))

## Bug fixes

Expand Down
1 change: 1 addition & 0 deletions src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export AbstractDataFrame,
dropmissing!,
dropmissing,
fillcombinations,
findcols,
flatten,
groupby,
groupindices,
Expand Down
26 changes: 26 additions & 0 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3252,3 +3252,29 @@ function Base.iterate(itr::Iterators.PartitionIterator{<:AbstractDataFrame}, sta
r = min(state + itr.n - 1, last_idx)
return view(itr.c, state:r, :), r + 1
end

"""
findall(f, df::AbstractDataFrame)
Return an integer vector `I` of the column indices `i` of `df` where `f(df[:, i])` returns `true`.
If there are no such columns of `df`, return `Int[]`.
# Examples
```jldoctest

Check failure on line 3264 in src/abstractdataframe/abstractdataframe.jl

View workflow job for this annotation

GitHub Actions / Documentation

doctest failure in ~/work/DataFrames.jl/DataFrames.jl/src/abstractdataframe/abstractdataframe.jl:3264-3277 ```jldoctest julia> df = DataFrame(a=[1, missing], b=[2, 3], c=[missing, 4]) 2×3 DataFrame Row │ a b c │ Int64? Int64 Int64? ─────┼───────────────────────── 1 │ 1 2 missing 2 │ missing 3 4 julia> findcols(x -> any(ismissing, x), df) 2-element Vector{Int64}: 1 3 ``` Subexpression: df = DataFrame(a=[1, missing], b=[2, 3], c=[missing, 4]) Evaluated output: 2×3 DataFrame Row │ a b c │ Int64? Int64 Int64? ─────┼───────────────────────── 1 │ 1 2 missing 2 │ missing 3 4 Expected output: 2×3 DataFrame Row │ a b c │ Int64? Int64 Int64? ─────┼───────────────────────── 1 │ 1 2 missing 2 │ missing 3 4 julia> findcols(x -> any(ismissing, x), df) 2-element Vector{Int64}: 1 3 diff = Warning: Diff output requires color. 2×3 DataFrame Row │ a b c │ Int64? Int64 Int64? ─────┼───────────────────────── 1 │ 1 2 missing 2 │ missing 3 4 julia> findcols(x -> any(ismissing, x), df) 2-element Vector{Int64}: 1 34
julia> df = DataFrame(a=[1, missing], b=[2, 3], c=[missing, 4])
2×3 DataFrame
Row │ a b c
│ Int64? Int64 Int64?
─────┼─────────────────────────
1 │ 1 2 missing
2 │ missing 3 4
julia> findcols(x -> any(ismissing, x), df)
2-element Vector{Int64}:
1
3
```
"""
findcols(f::Function, df::AbstractDataFrame) =
findall(f, eachcol(df))
13 changes: 13 additions & 0 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2387,4 +2387,17 @@ end
@test eltype(collect(p)) <: DataFrames.DataFrameRows
end

@testset "findcols" begin
df = DataFrame(a=[1, missing], b=[2, 3], c=[missing, 4])
@test findcols(x -> any(ismissing, x), df) == [1, 3]
@test findcols(x -> true, df) == [1, 2, 3]
@test findcols(x -> false, df) == Int[]
@test_throws TypeError findcols(x -> 1, df)

@test findcols(x -> any(ismissing, x), view(df, :, [1, 2])) == [1]
@test findcols(x -> true, view(df, :, [1, 2])) == [1, 2]
@test findcols(x -> false, view(df, :, [1, 2])) == Int[]
@test_throws TypeError findcols(x -> 1, view(df, :, [1, 2]))
end

end # module

0 comments on commit 524e034

Please sign in to comment.