Merge pull request #70 from AlgebraicJulia/serialization_interface

Generic interface for reading an acset
AlgebraicJulia · Oct 12, 2023 · 1f54cc0 · 1f54cc0
2 parents ba54732 + d9d484d
commit 1f54cc0
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 75 deletions.
diff --git a/Project.toml b/Project.toml
@@ -24,8 +24,8 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0"
 nauty_jll = "55c6dc9b-343a-50ca-8ff2-b71adb3733d5"
 
 [extensions]
-ExcelACSets = "XLSX"
 NautyACSetsExt = "nauty_jll"
+XLSXACSetsExt = "XLSX"
 
 [compat]
 AlgebraicInterfaces = "0.1"

diff --git a/ext/ExcelACSets.jl → ext/XLSXACSetsExt.jl b/ext/ExcelACSets.jl → ext/XLSXACSetsExt.jl
@@ -1,55 +1,16 @@
-""" Read acsets from Microsoft Excel files.
-"""
-module ExcelACSets
+module XLSXACSetsExt
 
 import Tables, XLSX
-using ACSets
-
-# Excel spec
-############
-
-const AbstractMap = Union{AbstractDict,NamedTuple}
-
-@kwdef struct ExcelTableSpec
-  sheet::Union{AbstractString,Integer,Missing} = missing
-  primary_key::Union{Symbol,Missing} = missing
-  row_range::Union{AbstractUnitRange,Integer,Missing} = missing
-  column_range::Union{AbstractString,Missing} = missing
-  column_labels::AbstractMap = (;)
-  convert::AbstractMap = (;)
-end
-
-@kwdef struct ExcelSpec
-  tables::AbstractDict{Symbol,ExcelTableSpec} = Dict{Symbol,ExcelTableSpec}()
-end
-
-function ExcelSpec(schema::Schema; tables::AbstractMap=(;), kw...)
-  table_specs = Dict(ob => ExcelTableSpec(; get(tables, ob, (;))...)
-                     for ob in objects(schema))
-  ExcelSpec(; tables=table_specs, kw...)
-end
-
-# Read from spec
-################
 
-""" Read acset from an Excel (.xlsx) file.
+using ACSets
+using ACSets.ACSetSerialization.ExcelACSets: ExcelSpec, ExcelTableSpec
 
-# Arguments
-- `source`: filename or IO stream from which to read Excel file
-- `cons`: constructor for acset, e.g., the acset type for struct acsets
-- `tables=(;)`: dictionary or named tuple mapping object names in acset schema
-  to Excel table specifications
-"""
-function ACSets.read_xlsx_acset(source::Union{AbstractString,IO}, cons; kw...)
-  read_acset(XLSX.readxlsx(source), cons; kw...)
-end
 
-# TODO: Define and export generic functions `read_acset` and `read_acset!`.
-function read_acset(xf::XLSX.XLSXFile, cons; kw...)
-  read_acset!(xf, cons(); kw...)
+function ExcelACSets.read_xlsx(source::Union{AbstractString,IO})
+  XLSX.readxlsx(source)
 end
 
-function read_acset!(xf::XLSX.XLSXFile, acs::ACSet; kw...)
+function ACSets.read_acset!(acs::ACSet, xf::XLSX.XLSXFile; kw...)
   # Read table for each object.
   schema = acset_schema(acs)
   spec = ExcelSpec(schema; kw...)

diff --git a/src/serialization/ExcelACSets.jl b/src/serialization/ExcelACSets.jl
@@ -0,0 +1,53 @@
+""" Read acsets from Microsoft Excel files.
+"""
+module ExcelACSets
+export read_xlsx_acset
+
+using ...ACSetInterface, ...Schemas, ..ACSetSerialization
+
+# Data types
+############
+
+const AbstractMap = Union{AbstractDict,NamedTuple}
+
+@kwdef struct ExcelTableSpec
+  sheet::Union{AbstractString,Integer,Missing} = missing
+  primary_key::Union{Symbol,Missing} = missing
+  row_range::Union{AbstractUnitRange,Integer,Missing} = missing
+  column_range::Union{AbstractString,Missing} = missing
+  column_labels::AbstractMap = (;)
+  convert::AbstractMap = (;)
+end
+
+@kwdef struct ExcelSpec
+  tables::AbstractDict{Symbol,ExcelTableSpec} = Dict{Symbol,ExcelTableSpec}()
+end
+
+function ExcelSpec(schema::Schema; tables::AbstractMap=(;), kw...)
+  table_specs = Dict(ob => ExcelTableSpec(; get(tables, ob, (;))...)
+                     for ob in objects(schema))
+  ExcelSpec(; tables=table_specs, kw...)
+end
+
+# Interface
+###########
+
+""" Read acset from an Excel (.xlsx) file.
+
+This is a convenience function that loads the Excel file and then calls
+[`read_acset`](@ref). To use this function, the package XLSX.jl must be
+installed and imported.
+
+# Arguments
+- `cons`: constructor for acset, e.g., the acset type for struct acsets
+- `source`: filename or IO stream from which to read Excel file
+- `tables=(;)`: dictionary or named tuple mapping object names in acset schema
+  to Excel table specifications
+"""
+function read_xlsx_acset(cons, source::Union{AbstractString,IO}; kw...)
+  read_acset(cons, read_xlsx(source); kw...)
+end
+
+function read_xlsx end
+
+end
diff --git a/src/serialization/JSONACSets.jl b/src/serialization/JSONACSets.jl
@@ -13,10 +13,13 @@ import Tables
 using ...ACSetInterface, ...Schemas, ...DenseACSets
 using ...DenseACSets: attr_type
 using ...ColumnImplementations: AttrVar # TODO: Move this.
+import ..ACSetSerialization: read_acset!
 
 # ACSet serialization
 #####################
 
+read_acset!(cons, source::AbstractDict) = parse_json_acset!(cons, source)
+
 """ Generate JSON-able object representing an ACSet.
 
 Inverse to [`parse_json_acset`](@ref).
@@ -40,40 +43,39 @@ attr_to_json(val) = val
 
 Inverse to [`generate_json_acset`](@ref).
 """
-parse_json_acset(::Type{T}, input::AbstractDict) where {T<:StructACSet} =
-  _parse_json_acset(T, input)
-parse_json_acset(d::DynamicACSet, input::AbstractDict) =
-  _parse_json_acset(constructor(d), input)
-
-function _parse_json_acset(cons, input::AbstractDict)
-  out = cons()
-  for (type, rows) ∈ input
-    add_parts!(out, Symbol(type), length(rows))
-  end
+parse_json_acset(cons, input::AbstractDict) =
+  parse_json_acset!(cons(), input)
+parse_json_acset(cons, input::AbstractString) =
+  parse_json_acset(cons, JSON.parse(input))
+parse_json_acset(acs::ACSet, input::AbstractDict) =
+  parse_json_acset(constructor(acs), input)
+
+function parse_json_acset!(out::ACSet, input::AbstractDict)
+  schema = acset_schema(out)
+  parts = Iterators.map(input) do (type, rows)
+    Symbol(type) => add_parts!(out, Symbol(type), length(rows))
+  end |> Dict
   for rows ∈ values(input)
     for (rownum, row) ∈ enumerate(rows)
-      for (k, v) ∈ row
+      for (k, v) ∈ pairs(row)
         k = Symbol(k)
         if k == :_id
           # For now, IDs are assumed to coincide with row number.
           @assert rownum == v
           continue
         end
-        is_attr = k ∈ attrs(acset_schema(out); just_names=true)
-        vtype = is_attr ? attr_type(out, k) : Int
-        v = v isa AbstractDict && haskey(v, "_var") ?
-          AttrVar(v["_var"]) : vtype(v)
-        set_subpart!(out, rownum, k, v)
+        if k ∈ attrs(schema; just_names=true)
+          vtype = attr_type(out, k)
+          v = v isa AbstractDict && haskey(v, "_var") ?
+            AttrVar(v["_var"]) : vtype(v)
+        end
+        set_subpart!(out, parts[dom(schema, k)][rownum], k, v)
       end
     end
   end
   out
 end
 
-function parse_json_acset(target, input::AbstractString)
-  parse_json_acset(target, JSON.parse(input))
-end
-
 """ Deserialize an ACSet object from a JSON file.
 
 Inverse to [`write_json_acset`](@ref).

diff --git a/src/serialization/Serialization.jl b/src/serialization/Serialization.jl
@@ -1,15 +1,43 @@
+""" Serializing and deserializing acsets to/from different formats.
+"""
 module ACSetSerialization
+export read_acset, read_acset!
+
 using Reexport
 
-include("JSONACSets.jl")
+# Interface
+###########
 
-@reexport using .JSONACSets
+""" Read/deserialize an acset from an external source.
 
-# Extensions
-############
+Supported source types include:
+
+- `AbstractDict`: assumed to be JSON data
+- `XLSX.XLSXFile`: Microsoft Excel file (requires XLSX.jl)
+
+# Arguments
+- `cons`: constructor for acset, e.g., the type of a struct acset
+- `source`: source to read from
+"""
+function read_acset(cons, source; kw...)
+  read_acset!(cons(), source; kw...)
+end
 
-function read_xlsx_acset end
+""" Mutating variant of [`read_acset`](@ref).
 
-export read_xlsx_acset
+# Arguments
+- `acset`: acset to write to
+- `source`: source to read from
+"""
+function read_acset! end
+
+# Serializers
+#############
+
+include("JSONACSets.jl")
+include("ExcelACSets.jl")
+
+@reexport using .JSONACSets
+@reexport using .ExcelACSets
 
 end
diff --git a/test/serialization/ExcelACSets.jl b/test/serialization/ExcelACSets.jl
@@ -71,7 +71,7 @@ tables = (
 )
 
 T = MutagenesisData{Bool,Int,String,Float64}
-result = read_xlsx_acset(mutagenesis_path, T, tables=tables)
+result = read_xlsx_acset(T, mutagenesis_path, tables=tables)
 @test nparts(result, :Molecule) == 188
 @test nparts(result, :Atom) == 4893
 @test nparts(result, :Bond) == 5243
@@ -91,7 +91,7 @@ g = @acset LabeledGraph{String} begin
   tgt = [2,3,4]
 end
 
-result = read_xlsx_acset(labeled_graph_path, LabeledGraph{String}, tables=(
+result = read_xlsx_acset(LabeledGraph{String}, labeled_graph_path, tables=(
   V = (primary_key = :label,
        sheet = 1,
        row_range = 4,

diff --git a/test/serialization/JSONACSets.jl b/test/serialization/JSONACSets.jl
@@ -25,6 +25,7 @@ add_parts!(g, :E, 5, src=[1,2,3,4,5], tgt=[2,3,4,5,1])
 @test roundtrip_json_acset(g) == g
 json = generate_json_acset(g)
 @test all(row -> haskey(row, :_id), json[:V])
+@test read_acset(Graph, json) == g
 
 SchWeightedGraph = BasicSchema([:V,:E], [(:src,:E,:V),(:tgt,:E,:V)],
                                [:Weight], [(:weight,:E,:Weight)])
@@ -41,7 +42,6 @@ add_parts!(g, :V, 3)
 add_parts!(g, :E, 2, src=[1,2], tgt=[2,3], weight=[0.5,1.5])
 @test roundtrip_json_acset(g) == g
 
-
 SchLabeledDDS = BasicSchema([:X], [(:Φ,:X,:X)], [:Label], [(:label,:X,:Label)])
 @acset_type LabeledDDS(SchLabeledDDS, index=[:Φ])