Skip to content

Commit

Permalink
Merge pull request #37 from ceferisbarov/coerce
Browse files Browse the repository at this point in the history
Add Coerce
  • Loading branch information
juliohm authored Apr 13, 2022
2 parents d1fd5ec + f196d3a commit af0128a
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ Please check the docstrings for additional information.
| `DropMissing` | Drop missings |
| `Rename` | Column renaming |
| `Coalesce` | Replace missings |
| `Coerce` | Coerce scientific types |
| `Identity` | Identity transform |
| `Center` | Mean removal |
| `Scale` | Interval scaling |
Expand Down
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export
DropMissing,
Rename,
Coalesce,
Coerce,
Identity,
Center,
Scale,
Expand Down
1 change: 1 addition & 0 deletions src/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ include("transforms/select.jl")
include("transforms/filter.jl")
include("transforms/rename.jl")
include("transforms/coalesce.jl")
include("transforms/coerce.jl")
include("transforms/identity.jl")
include("transforms/center.jl")
include("transforms/scale.jl")
Expand Down
48 changes: 48 additions & 0 deletions src/transforms/coerce.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

"""
Coerce(pairs, tight=false, verbosity=1)
Return a copy of the table, ensuring that the scientific types of the columns match the new specification.
This transform wraps the ScientificTypes.coerce function. Please see their docstring for more details.
```julia
Coerce(:col1 => Continuous, :col2 => Count)
```
"""
struct Coerce{P} <: Transform
pairs::P
tight::Bool
verbosity::Int
end

Coerce(pair::Pair{Symbol,<:Type}...; tight=false, verbosity=1) =
Coerce(pair, tight, verbosity)

isrevertible(::Type{<:Coerce}) = true

function apply(transform::Coerce, table)
newtable = coerce(table, transform.pairs...;
tight=transform.tight,
verbosity=transform.verbosity)

types = Tables.schema(table).types

newtable, types
end

function revert(transform::Coerce, newtable, cache)
names = Tables.columnnames(newtable)
cols = Tables.columns(newtable)
oldcols = map(zip(cache, names)) do (T, n)
x = Tables.getcolumn(cols, n)
collect(T, x)
end

𝒯 = (; zip(names, oldcols)...)
𝒯 |> Tables.materializer(newtable)
end

2 changes: 2 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReferenceTests = "324d217c-45ce-50fc-942e-d289b448e8cf"
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ using TableTransforms
using Distributions
using Tables
using TypedTables
using CategoricalArrays
using ScientificTypes: Count, Multiclass
using LinearAlgebra
using Statistics
using Test, Random, Plots
Expand Down
25 changes: 25 additions & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,31 @@
@test ttypes == Tables.schema(tₒ).types
end

@testset "Coerce" begin
x1 = [1.0, 2.0, 3.0, 4.0, 5.0]
x2 = [1.0, 2.0, 3.0, 4.0, 5.0]
x3 = [5.0, 5.0, 5.0, 5.0, 5.0]
t = Table(;x1, x2, x3)

T = Coerce(:x1=>Count, :x2=>Count)
n, c = apply(T, t)
@test eltype(n.x1) == Int
@test eltype(n.x2) == Int
n, c = apply(T, t)
tₒ = revert(T, n, c)
@test eltype(tₒ.x1) == eltype(t.x1)
@test eltype(tₒ.x2) == eltype(t.x2)

T = Coerce(:x1=>Multiclass, :x2=>Multiclass)
n, c = apply(T, t)
@test eltype(n.x1) <: CategoricalValue
@test eltype(n.x2) <: CategoricalValue
n, c = apply(T, t)
tₒ = revert(T, n, c)
@test eltype(tₒ.x1) == eltype(t.x1)
@test eltype(tₒ.x2) == eltype(t.x2)
end

@testset "Identity" begin
x = rand(4000)
y = rand(4000)
Expand Down

0 comments on commit af0128a

Please sign in to comment.