Skip to content

Commit

Permalink
Merge pull request #70 from AlgebraicJulia/serialization_interface
Browse files Browse the repository at this point in the history
Generic interface for reading an acset
  • Loading branch information
epatters authored Oct 12, 2023
2 parents ba54732 + d9d484d commit 1f54cc0
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 75 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0"
nauty_jll = "55c6dc9b-343a-50ca-8ff2-b71adb3733d5"

[extensions]
ExcelACSets = "XLSX"
NautyACSetsExt = "nauty_jll"
XLSXACSetsExt = "XLSX"

[compat]
AlgebraicInterfaces = "0.1"
Expand Down
51 changes: 6 additions & 45 deletions ext/ExcelACSets.jl → ext/XLSXACSetsExt.jl
Original file line number Diff line number Diff line change
@@ -1,55 +1,16 @@
""" Read acsets from Microsoft Excel files.
"""
module ExcelACSets
module XLSXACSetsExt

import Tables, XLSX
using ACSets

# Excel spec
############

const AbstractMap = Union{AbstractDict,NamedTuple}

@kwdef struct ExcelTableSpec
sheet::Union{AbstractString,Integer,Missing} = missing
primary_key::Union{Symbol,Missing} = missing
row_range::Union{AbstractUnitRange,Integer,Missing} = missing
column_range::Union{AbstractString,Missing} = missing
column_labels::AbstractMap = (;)
convert::AbstractMap = (;)
end

@kwdef struct ExcelSpec
tables::AbstractDict{Symbol,ExcelTableSpec} = Dict{Symbol,ExcelTableSpec}()
end

function ExcelSpec(schema::Schema; tables::AbstractMap=(;), kw...)
table_specs = Dict(ob => ExcelTableSpec(; get(tables, ob, (;))...)
for ob in objects(schema))
ExcelSpec(; tables=table_specs, kw...)
end

# Read from spec
################

""" Read acset from an Excel (.xlsx) file.
using ACSets
using ACSets.ACSetSerialization.ExcelACSets: ExcelSpec, ExcelTableSpec

# Arguments
- `source`: filename or IO stream from which to read Excel file
- `cons`: constructor for acset, e.g., the acset type for struct acsets
- `tables=(;)`: dictionary or named tuple mapping object names in acset schema
to Excel table specifications
"""
function ACSets.read_xlsx_acset(source::Union{AbstractString,IO}, cons; kw...)
read_acset(XLSX.readxlsx(source), cons; kw...)
end

# TODO: Define and export generic functions `read_acset` and `read_acset!`.
function read_acset(xf::XLSX.XLSXFile, cons; kw...)
read_acset!(xf, cons(); kw...)
function ExcelACSets.read_xlsx(source::Union{AbstractString,IO})
XLSX.readxlsx(source)
end

function read_acset!(xf::XLSX.XLSXFile, acs::ACSet; kw...)
function ACSets.read_acset!(acs::ACSet, xf::XLSX.XLSXFile; kw...)
# Read table for each object.
schema = acset_schema(acs)
spec = ExcelSpec(schema; kw...)
Expand Down
53 changes: 53 additions & 0 deletions src/serialization/ExcelACSets.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
""" Read acsets from Microsoft Excel files.
"""
module ExcelACSets
export read_xlsx_acset

using ...ACSetInterface, ...Schemas, ..ACSetSerialization

# Data types
############

const AbstractMap = Union{AbstractDict,NamedTuple}

@kwdef struct ExcelTableSpec
sheet::Union{AbstractString,Integer,Missing} = missing
primary_key::Union{Symbol,Missing} = missing
row_range::Union{AbstractUnitRange,Integer,Missing} = missing
column_range::Union{AbstractString,Missing} = missing
column_labels::AbstractMap = (;)
convert::AbstractMap = (;)
end

@kwdef struct ExcelSpec
tables::AbstractDict{Symbol,ExcelTableSpec} = Dict{Symbol,ExcelTableSpec}()
end

function ExcelSpec(schema::Schema; tables::AbstractMap=(;), kw...)
table_specs = Dict(ob => ExcelTableSpec(; get(tables, ob, (;))...)
for ob in objects(schema))
ExcelSpec(; tables=table_specs, kw...)
end

# Interface
###########

""" Read acset from an Excel (.xlsx) file.
This is a convenience function that loads the Excel file and then calls
[`read_acset`](@ref). To use this function, the package XLSX.jl must be
installed and imported.
# Arguments
- `cons`: constructor for acset, e.g., the acset type for struct acsets
- `source`: filename or IO stream from which to read Excel file
- `tables=(;)`: dictionary or named tuple mapping object names in acset schema
to Excel table specifications
"""
function read_xlsx_acset(cons, source::Union{AbstractString,IO}; kw...)
read_acset(cons, read_xlsx(source); kw...)
end

function read_xlsx end

end
42 changes: 22 additions & 20 deletions src/serialization/JSONACSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ import Tables
using ...ACSetInterface, ...Schemas, ...DenseACSets
using ...DenseACSets: attr_type
using ...ColumnImplementations: AttrVar # TODO: Move this.
import ..ACSetSerialization: read_acset!

# ACSet serialization
#####################

read_acset!(cons, source::AbstractDict) = parse_json_acset!(cons, source)

""" Generate JSON-able object representing an ACSet.
Inverse to [`parse_json_acset`](@ref).
Expand All @@ -40,40 +43,39 @@ attr_to_json(val) = val
Inverse to [`generate_json_acset`](@ref).
"""
parse_json_acset(::Type{T}, input::AbstractDict) where {T<:StructACSet} =
_parse_json_acset(T, input)
parse_json_acset(d::DynamicACSet, input::AbstractDict) =
_parse_json_acset(constructor(d), input)

function _parse_json_acset(cons, input::AbstractDict)
out = cons()
for (type, rows) input
add_parts!(out, Symbol(type), length(rows))
end
parse_json_acset(cons, input::AbstractDict) =
parse_json_acset!(cons(), input)
parse_json_acset(cons, input::AbstractString) =
parse_json_acset(cons, JSON.parse(input))
parse_json_acset(acs::ACSet, input::AbstractDict) =
parse_json_acset(constructor(acs), input)

function parse_json_acset!(out::ACSet, input::AbstractDict)
schema = acset_schema(out)
parts = Iterators.map(input) do (type, rows)
Symbol(type) => add_parts!(out, Symbol(type), length(rows))
end |> Dict
for rows values(input)
for (rownum, row) enumerate(rows)
for (k, v) row
for (k, v) pairs(row)
k = Symbol(k)
if k == :_id
# For now, IDs are assumed to coincide with row number.
@assert rownum == v
continue
end
is_attr = k attrs(acset_schema(out); just_names=true)
vtype = is_attr ? attr_type(out, k) : Int
v = v isa AbstractDict && haskey(v, "_var") ?
AttrVar(v["_var"]) : vtype(v)
set_subpart!(out, rownum, k, v)
if k attrs(schema; just_names=true)
vtype = attr_type(out, k)
v = v isa AbstractDict && haskey(v, "_var") ?
AttrVar(v["_var"]) : vtype(v)
end
set_subpart!(out, parts[dom(schema, k)][rownum], k, v)
end
end
end
out
end

function parse_json_acset(target, input::AbstractString)
parse_json_acset(target, JSON.parse(input))
end

""" Deserialize an ACSet object from a JSON file.
Inverse to [`write_json_acset`](@ref).
Expand Down
40 changes: 34 additions & 6 deletions src/serialization/Serialization.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,43 @@
""" Serializing and deserializing acsets to/from different formats.
"""
module ACSetSerialization
export read_acset, read_acset!

using Reexport

include("JSONACSets.jl")
# Interface
###########

@reexport using .JSONACSets
""" Read/deserialize an acset from an external source.
# Extensions
############
Supported source types include:
- `AbstractDict`: assumed to be JSON data
- `XLSX.XLSXFile`: Microsoft Excel file (requires XLSX.jl)
# Arguments
- `cons`: constructor for acset, e.g., the type of a struct acset
- `source`: source to read from
"""
function read_acset(cons, source; kw...)
read_acset!(cons(), source; kw...)
end

function read_xlsx_acset end
""" Mutating variant of [`read_acset`](@ref).
export read_xlsx_acset
# Arguments
- `acset`: acset to write to
- `source`: source to read from
"""
function read_acset! end

# Serializers
#############

include("JSONACSets.jl")
include("ExcelACSets.jl")

@reexport using .JSONACSets
@reexport using .ExcelACSets

end
4 changes: 2 additions & 2 deletions test/serialization/ExcelACSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ tables = (
)

T = MutagenesisData{Bool,Int,String,Float64}
result = read_xlsx_acset(mutagenesis_path, T, tables=tables)
result = read_xlsx_acset(T, mutagenesis_path, tables=tables)
@test nparts(result, :Molecule) == 188
@test nparts(result, :Atom) == 4893
@test nparts(result, :Bond) == 5243
Expand All @@ -91,7 +91,7 @@ g = @acset LabeledGraph{String} begin
tgt = [2,3,4]
end

result = read_xlsx_acset(labeled_graph_path, LabeledGraph{String}, tables=(
result = read_xlsx_acset(LabeledGraph{String}, labeled_graph_path, tables=(
V = (primary_key = :label,
sheet = 1,
row_range = 4,
Expand Down
2 changes: 1 addition & 1 deletion test/serialization/JSONACSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ add_parts!(g, :E, 5, src=[1,2,3,4,5], tgt=[2,3,4,5,1])
@test roundtrip_json_acset(g) == g
json = generate_json_acset(g)
@test all(row -> haskey(row, :_id), json[:V])
@test read_acset(Graph, json) == g

SchWeightedGraph = BasicSchema([:V,:E], [(:src,:E,:V),(:tgt,:E,:V)],
[:Weight], [(:weight,:E,:Weight)])
Expand All @@ -41,7 +42,6 @@ add_parts!(g, :V, 3)
add_parts!(g, :E, 2, src=[1,2], tgt=[2,3], weight=[0.5,1.5])
@test roundtrip_json_acset(g) == g


SchLabeledDDS = BasicSchema([:X], [(,:X,:X)], [:Label], [(:label,:X,:Label)])
@acset_type LabeledDDS(SchLabeledDDS, index=[])

Expand Down

0 comments on commit 1f54cc0

Please sign in to comment.