Skip to content

Commit

Permalink
fix nonunique bug
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Oct 20, 2023
1 parent 1a5da8a commit 27be194
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/abstractdataframe/unique.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ function nonunique(df::AbstractDataFrame; keep::Symbol=:first)
if !(keep in (:first, :last, :noduplicates))
throw(ArgumentError("`keep` must be :first, :last, or :noduplicates"))
end
ncol(df) == 0 && return Bool[]
nrow(df) == 0 && return Bool[]
res = fill(true, nrow(df))
cols = ntuple(i -> df[!, i], ncol(df))
if keep == :first
Expand Down
6 changes: 5 additions & 1 deletion src/groupeddataframe/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,11 @@ function row_group_slots!(cols::NTuple{N, AbstractVector},
nt = max(1, lg ÷ 100_000)
end
# if there are few rows per group limit the number of threads used
nt = clamp(round(Int, (lg / 4) / ngroups - 2), 1, nt)
if ngroups == 0
nt = 1

Check warning on line 341 in src/groupeddataframe/utils.jl

View check run for this annotation

Codecov / codecov/patch

src/groupeddataframe/utils.jl#L341

Added line #L341 was not covered by tests
else
nt = clamp(round(Int, (lg / 4) / ngroups - 2), 1, nt)
end

seen = fill(false, ngroups)
seen_vec = Vector{Vector{Bool}}(undef, nt)
Expand Down
9 changes: 6 additions & 3 deletions test/duplicates.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module TestDuplicates

using Test, DataFrames, CategoricalArrays, Random
using Test, DataFrames, CategoricalArrays, Random, PooledArrays
const = isequal

@testset "nonunique" begin
Expand Down Expand Up @@ -30,15 +30,18 @@ const ≅ = isequal
@test_throws ArgumentError unique!(df)
@test_throws ArgumentError unique(df, true)

pdf = view(DataFrame(a=CategoricalArray(["a", "a", missing, missing, "b", missing, "a", missing]),
b=CategoricalArray(["a", "b", missing, missing, "b", "a", "a", "a"])), :, :)
pdf = view(DataFrame(a=CategoricalArray(["a", "a", missing, missing, "b", missing, "a", missing]),
b=CategoricalArray(["a", "b", missing, missing, "b", "a", "a", "a"])), :, :)
updf = DataFrame(a=CategoricalArray(["a", "a", missing, "b", missing]),
b=CategoricalArray(["a", "b", missing, "b", "a"]))
@test nonunique(pdf) == [false, false, false, true, false, false, true, true]
@test nonunique(updf) == falses(5)
@test updf unique(pdf)
@test_throws ArgumentError unique!(pdf)
@test_throws ArgumentError unique(pdf, true)

@test isempty(nonunique(DataFrame(a=PooledArray(Int[]))))
@test typeof(nonunique(DataFrame(a=PooledArray(Int[])))) === Vector{Bool}
end

@testset "nonunique, nonunique, unique! with extra argument" begin
Expand Down

0 comments on commit 27be194

Please sign in to comment.