Skip to content

Commit

Permalink
Merge pull request #42 from eliascarv/replace
Browse files Browse the repository at this point in the history
Add Replace
  • Loading branch information
eliascarv authored Apr 13, 2022
2 parents af0128a + 902944c commit ee9f36e
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Please check the docstrings for additional information.
| `Filter` | Row filtering |
| `DropMissing` | Drop missings |
| `Rename` | Column renaming |
| `Replace` | Replace values |
| `Coalesce` | Replace missings |
| `Coerce` | Coerce scientific types |
| `Identity` | Identity transform |
Expand Down
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export
Filter,
DropMissing,
Rename,
Replace,
Coalesce,
Coerce,
Identity,
Expand Down
1 change: 1 addition & 0 deletions src/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ end
include("transforms/select.jl")
include("transforms/filter.jl")
include("transforms/rename.jl")
include("transforms/replace.jl")
include("transforms/coalesce.jl")
include("transforms/coerce.jl")
include("transforms/identity.jl")
Expand Down
28 changes: 28 additions & 0 deletions src/transforms/replace.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

"""
Replace(old₁ => new₁, old₂ => new₂, ..., oldₙ => newₙ)
Replaces `oldᵢ` value with `newᵢ` value in the table.
"""
struct Replace{K,V} <: Colwise
pairs::IdDict{K,V}
end

Replace() = throw(ArgumentError("Cannot create a Replace object without arguments."))

Replace(pairs::Pair...) = Replace(IdDict(values(pairs)))

isrevertible(::Type{<:Replace}) = true

function colcache(transform::Replace, x)
olds = keys(transform.pairs)
inds = [findall(v -> v === old, x) .=> old for old in olds]
Dict(reduce(vcat, inds))
end

colapply(transform::Replace, x, c) = [get(transform.pairs, xᵢ, xᵢ) for xᵢ in x]

colrevert(transform::Replace, x, c) = [get(c, i, x[i]) for i in 1:length(x)]
85 changes: 84 additions & 1 deletion test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,89 @@
@test n1 == n2
end

@testset "Replace" begin
a = [3, 2, 1, 4, 5, 3]
b = [2, 4, 4, 5, 8, 5]
c = [1, 1, 6, 2, 4, 1]
d = [4, 3, 7, 5, 4, 1]
e = [5, 5, 2, 6, 5, 2]
f = [4, 4, 3, 4, 5, 2]
t = Table(; a, b, c, d, e, f)

# replace with a value of the same type
T = Replace(1 => -1, 5 => -5)
n, c = apply(T, t)
@test n.a == [3, 2, -1, 4, -5, 3]
@test n.b == [2, 4, 4, -5, 8, -5]
@test n.c == [-1, -1, 6, 2, 4, -1]
@test n.d == [4, 3, 7, -5, 4, -1]
@test n.e == [-5, -5, 2, 6, -5, 2]
@test n.f == [4, 4, 3, 4, -5, 2]
@test isrevertible(T) == true
tₒ = revert(T, n, c)
@test t == tₒ

# table schema after apply and revert
T = Replace(1 => -1, 5 => -5)
n, c = apply(T, t)
types = Tables.schema(t).types
@test types == Tables.schema(n).types
tₒ = revert(T, n, c)
@test types == Tables.schema(tₒ).types

# replace with a value of another type
T = Replace(1 => 1.5, 5 => 5.5, 4 => true)
n, c = apply(T, t)
@test n.a == Real[3, 2, 1.5, true, 5.5, 3]
@test n.b == Real[2, true, true, 5.5, 8, 5.5]
@test n.c == Real[1.5, 1.5, 6, 2, true, 1.5]
@test n.d == Real[true, 3, 7, 5.5, true, 1.5]
@test n.e == Real[5.5, 5.5, 2, 6, 5.5, 2]
@test n.f == Real[true, true, 3, true, 5.5, 2]
tₒ = revert(T, n, c)
@test t == tₒ

# table schema after apply and revert
T = Replace(1 => 1.5, 5 => 5.5, 4 => true)
n, c = apply(T, t)
tₒ = revert(T, n, c)
ttypes = Tables.schema(t).types
ntypes = Tables.schema(n).types
@test ntypes[1] == Real
@test ntypes[2] == Real
@test ntypes[3] == Real
@test ntypes[4] == Real
@test ntypes[5] == Real
@test ntypes[6] == Real
@test ttypes == Tables.schema(tₒ).types

# no occurrences
T = Replace(10 => 11, 20 => 30)
n, c = apply(T, t)
@test t == n
tₒ = revert(T, n, c)
@test t == tₒ

# collumns with diferent types
a = [3, 2, 1, 4, 5, 3]
b = [2.5, 4.5, 4.7, 2.5, 2.5, 5.3]
c = [true, false, false, false, true, false]
d = ['a', 'b', 'c', 'd', 'e', 'a']
t = Table(; a, b, c, d)

T = Replace(3 => -3, 2.5 => 2.0, true => false, 'a' => 'A')
n, c = apply(T, t)
@test n.a == [-3, 2, 1, 4, 5, -3]
@test n.b == [2.0, 4.5, 4.7, 2.0, 2.0, 5.3]
@test n.c == [false, false, false, false, false, false]
@test n.d == ['A', 'b', 'c', 'd', 'e', 'A']
tₒ = revert(T, n, c)
@test t == tₒ

# throws
@test_throws ArgumentError Replace()
end

@testset "Coalesce" begin
a = [3, 2, missing, 4, 5, 3]
b = [missing, 4, 4, 5, 8, 5]
Expand Down Expand Up @@ -672,7 +755,7 @@
@test ntypes[6] == Int
@test ttypes == Tables.schema(tₒ).types
end

@testset "Coerce" begin
x1 = [1.0, 2.0, 3.0, 4.0, 5.0]
x2 = [1.0, 2.0, 3.0, 4.0, 5.0]
Expand Down

0 comments on commit ee9f36e

Please sign in to comment.