From 27be1948ad0ffbbc522444f51cda3ff5b7a3f162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 20 Oct 2023 11:44:50 +0200 Subject: [PATCH] fix nonunique bug --- src/abstractdataframe/unique.jl | 2 +- src/groupeddataframe/utils.jl | 6 +++++- test/duplicates.jl | 9 ++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/abstractdataframe/unique.jl b/src/abstractdataframe/unique.jl index fd94caac66..9441b825c0 100644 --- a/src/abstractdataframe/unique.jl +++ b/src/abstractdataframe/unique.jl @@ -87,7 +87,7 @@ function nonunique(df::AbstractDataFrame; keep::Symbol=:first) if !(keep in (:first, :last, :noduplicates)) throw(ArgumentError("`keep` must be :first, :last, or :noduplicates")) end - ncol(df) == 0 && return Bool[] + nrow(df) == 0 && return Bool[] res = fill(true, nrow(df)) cols = ntuple(i -> df[!, i], ncol(df)) if keep == :first diff --git a/src/groupeddataframe/utils.jl b/src/groupeddataframe/utils.jl index 1a58b7ec44..0e4b1d9840 100644 --- a/src/groupeddataframe/utils.jl +++ b/src/groupeddataframe/utils.jl @@ -337,7 +337,11 @@ function row_group_slots!(cols::NTuple{N, AbstractVector}, nt = max(1, lg ÷ 100_000) end # if there are few rows per group limit the number of threads used - nt = clamp(round(Int, (lg / 4) / ngroups - 2), 1, nt) + if ngroups == 0 + nt = 1 + else + nt = clamp(round(Int, (lg / 4) / ngroups - 2), 1, nt) + end seen = fill(false, ngroups) seen_vec = Vector{Vector{Bool}}(undef, nt) diff --git a/test/duplicates.jl b/test/duplicates.jl index 61c01874d2..d8c264962b 100644 --- a/test/duplicates.jl +++ b/test/duplicates.jl @@ -1,6 +1,6 @@ module TestDuplicates -using Test, DataFrames, CategoricalArrays, Random +using Test, DataFrames, CategoricalArrays, Random, PooledArrays const ≅ = isequal @testset "nonunique" begin @@ -30,8 +30,8 @@ const ≅ = isequal @test_throws ArgumentError unique!(df) @test_throws ArgumentError unique(df, true) - pdf = view(DataFrame(a=CategoricalArray(["a", "a", missing, missing, "b", missing, "a", missing]), - b=CategoricalArray(["a", "b", missing, missing, "b", "a", "a", "a"])), :, :) + pdf = view(DataFrame(a=CategoricalArray(["a", "a", missing, missing, "b", missing, "a", missing]), + b=CategoricalArray(["a", "b", missing, missing, "b", "a", "a", "a"])), :, :) updf = DataFrame(a=CategoricalArray(["a", "a", missing, "b", missing]), b=CategoricalArray(["a", "b", missing, "b", "a"])) @test nonunique(pdf) == [false, false, false, true, false, false, true, true] @@ -39,6 +39,9 @@ const ≅ = isequal @test updf ≅ unique(pdf) @test_throws ArgumentError unique!(pdf) @test_throws ArgumentError unique(pdf, true) + + @test isempty(nonunique(DataFrame(a=PooledArray(Int[])))) + @test typeof(nonunique(DataFrame(a=PooledArray(Int[])))) === Vector{Bool} end @testset "nonunique, nonunique, unique! with extra argument" begin