Skip to content

Commit

Permalink
Merge pull request #153 from cscaff/ACSetSpec_PEG_Implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
jpfairbanks authored Oct 8, 2024
2 parents 0c5b066 + aaaed94 commit 4ea3ec1
Show file tree
Hide file tree
Showing 6 changed files with 279 additions and 0 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PEG = "12d937ae-5f68-53be-93c9-3a6f997a20a8"
Permutations = "2ae35dd2-176d-5d53-8349-f30d82d94d4f"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Expand Down
2 changes: 2 additions & 0 deletions src/ACSets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ include("intertypes/InterTypes.jl")
include("serialization/Serialization.jl")
include("ADTs.jl")
include("NautyInterface.jl")
include("Parsers.jl")

@reexport using .ColumnImplementations: AttrVar
@reexport using .Schemas
Expand All @@ -23,6 +24,7 @@ include("NautyInterface.jl")
@reexport using .InterTypes
@reexport using .ACSetSerialization
using .ADTs
using .Parsers
@reexport using .NautyInterface

end
105 changes: 105 additions & 0 deletions src/Parsers.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
""" Parsers
Parsing ACSetSpecs using PEG.jl. This module allows you to build custom
grammars that represent the models in strings that aren't tied to any particular
programming language syntax. Specifically functional for parsing and constructing
ACSetSpecs.
"""

module Parsers

using Reexport
using ACSets, ACSets.ADTs

@reexport using PEG

# Export macro
export @acsetspec_str

# Export lexing rules
export ws, eq, lparen, rparen, comma, EOL, colon, identifier

# Export parsing rules
export acset_spec, block, line, statement, args, arg

# Basic Lexing rules for scanning string of characters
# Breaks up words/structures into tokens

@rule ws = r"\s*"
@rule eq = r"="p
@rule lparen = r"\("
@rule rparen = r"\)"
@rule comma = r","p
@rule EOL = "\n" , ";"
@rule colon = r":"p
@rule identifier = r"[^:{}→\n;=,\(\)\s]+"

# Core Parsing rules for ACSetSpecs
# ACSetSpec Structure:
# acsetspec(head, body)
#
# Example:
#
# acsetspec"""
# LabeledGraph{Symbol}
# begin
# V(label=a)
# V(label=b)
# V(label=c)
# E(src=1,tgt=3)
# E(src=2,tgt=3)
# end
# """
#

# This PEG.jl-based parser takes from the recursive decent
# parser in ACSets.jl/ADTs.jl and parses "acsetspec(head, body)"

# acset_spec takes in head and body args
@rule acset_spec = ws & head & r"begin"p & block & r"end"p |> v -> ACSetSpec(v[2], v[4])
# Ensures "head" exists but does not check type
@rule head = r"\S*"p |> v -> Symbol(v)
# Block contains one or more lines of statements
@rule block = line[*] & r"\n?"p |> v -> v[1]
# Line contains a statement followed by a new line or ";"
@rule line = ws & statement & r"[^\S\r\n]*" & EOL |> v -> v[2]
# Statement contains a call followed by arguments in parenthesis: "identifier(args)"
@rule statement = identifier & lparen & ws & args & ws & rparen |> v -> Statement(Symbol(v[1]), v[4])
# args contains one or more arguments separated by commas
@rule args = (arg & ws & comma)[*] & arg |> v -> collect_args(v)
# arg can be a list of further arguments, a key-value pair, or a single value
@rule arg = ((lparen & args & rparen) |> v -> v[2]),
((identifier & eq & arg) |> v -> parse_assignment(v)),
(identifier |> v -> parse_identifier(v))

# Collects and flattens arguments into a single list
function collect_args(v::Vector{Any})
output = Vector{Args}(first.(v[1]))
push!(output, last(v))
end

# Parses an identifier into a symbol/integer
function parse_identifier(v)
v_parsed = tryparse(Int, v)
if isnothing(v_parsed)
Value(Symbol(v))
else
Value(v_parsed)
end
end

# Parses an assignment statement
# Vectors wrapped as Value
# Ensures singular Values are not wrapped twice
function parse_assignment(v)
if v[3] isa Vector
Kwarg(Symbol(v[1]), Value(v[3]))
else
Kwarg(Symbol(v[1]), v[3])
end
end

# Creates a string macro to parse/create acsetspec
macro acsetspec_str(x::String) parse_whole(acset_spec, x) end

end
1 change: 1 addition & 0 deletions test/ADTs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,4 @@ end
end

end

166 changes: 166 additions & 0 deletions test/Parsers.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
""" ParserTests
This module simply contains the different tests for ensuring proper functionality
of the Parsers module. Specifically, it ensures unit tests for each PEG.jl rule functions,
unit tests for error handling of each rule, and an end-to-end test for overall functionality.
"""

module ParserTests

using Test
using ACSets, ACSets.ADTs
using ACSets.Parsers

#Overaloads "==" to properly compare two statement structs
function Base.:(==)(s1::ACSets.ADTs.Statement, s2::ACSets.ADTs.Statement)
s1.table == s2.table && s1.element == s2.element
end

#Overloads "==" to properly compare two Kwargs
function Base.:(==)(s1::ACSets.ADTs.Kwarg, s2::ACSets.ADTs.Kwarg)
s1._1 == s2._1 && s1._2 == s2._2
end

#Overloads "==" to properly compare two Values
function Base.:(==)(s1::ACSets.ADTs.Value, s2::ACSets.ADTs.Value)
s1._1 == s2._1
end

#Taken from "PEG.jl/blob/master/test/misc.jl" to test parsing exception handling
function parse_fails_at(rule, input)
try
parse_whole(rule, input)
"parse succeeded!"
catch err
isa(err, Meta.ParseError) || rethrow()
m = match(r"^On line \d+, at column \d+ \(byte (\d+)\):", err.msg)
m == nothing && rethrow()
parse(Int, m.captures[1])
end
end

# -------------- Test Groups per rule: -------------- #
@testset "arg_test" begin
@test arg("1")[1] == Value(1)
@test arg("test=2")[1] == Kwarg(:test, Value(2))
@test arg("(1,2,3)")[1] == ([Value(1), Value(2), Value(3)])
end

@testset "args_test" begin
@test args("a, b, c")[1] == ([Value(:a), Value(:b), Value(:c)])
@test args("1,2,3")[1] == ([Value(1), Value(2), Value(3)])
@test args("1 , 1")[1] == ([Value(1), Value(1)])
@test args("a=1, b=2, c=3")[1] == ([Kwarg(:a, Value(1)), Kwarg(:b, Value(2)), Kwarg(:c, Value(3))])
@test args("1, b=2, c=3")[1] == ([Value(1), Kwarg(:b, Value(2)), Kwarg(:c, Value(3))])
@test args("a=1, b=2, c=3, d=4")[1] == ([Kwarg(:a, Value(1)), Kwarg(:b, Value(2)), Kwarg(:c, Value(3)), Kwarg(:d, Value(4))])
@test args("a=1, b=(1,1), c=2")[1] == ([Kwarg(:a, Value(1)), Kwarg(:b, Value([Value(1), Value(1)])), Kwarg(:c, Value(2))])
@test args("foo, b ,c")[1] == ([Value(:foo), Value(:b), Value(:c)])
end

@testset "statement_test" begin
@test statement("test(a)")[1] == Statement(:test, [Value(:a)])
@test statement("test(a, b)")[1] == Statement(:test, [Value(:a), Value(:b)])
@test statement("test( foo , bar , buz )")[1] == Statement(:test, [Value(:foo), Value(:bar), Value(:buz)])
@test statement("E(src=1,tgt=3)")[1] == Statement(:E, [Kwarg(:src, Value(1)), Kwarg(:tgt, Value(3))])
@test statement("A(src=(0,0), length=(1,1))")[1] == Statement(:A, [Kwarg(:src, Value([Value(0), Value(0)])), Kwarg(:length, Value([Value(1), Value(1)]))])
@test statement("A(label=a, src=(0,0))")[1] == Statement(:A, [Kwarg(:label, Value(:a)), Kwarg(:src, Value([Value(0), Value(0)]))])
end

@testset "line_test" begin
@test line("test(a)\n")[1] == Statement(:test, [Value(:a)])
@test line("test(a);")[1] == Statement(:test, [Value(:a)])
@test line(" test(a) \n")[1] == Statement(:test, [Value(:a)])
end

@testset "block_test" begin
@test block("test(a)\n test(b)\n test(c)\n")[1] == [Statement(:test, [Value(:a)]), Statement(:test, [Value(:b)]), Statement(:test, [Value(:c)])]
end

# -------------- Test Error Handling -------------- #

@testset "arg_handling" begin
@test parse_fails_at(arg, "(1") == 3 #Missing "(" at index 3
@test parse_fails_at(arg, "a=") == 3 #Missing value after "=" at index 3
@test parse_fails_at(arg,"invalid→ident") == 8 #Invalid character at index 1
end

@testset "args_handling" begin
@test parse_fails_at(args, "1,") == 3 #Missing value after "," at index 3
@test parse_fails_at(args, ", 3") == 1 #Missing initial value before "," at index 1
@test parse_fails_at(args, "1 1") == 3 #Missing "," at index 3
end

@testset "statement_handling" begin
@test parse_fails_at(statement, "test") == 5 #Missing "(" at index 5
@test parse_fails_at(statement, "test()") == 6 #Missing argument at index 6
@test parse_fails_at(statement, "test(1") == 7 #Missing ")" at index 7
end

@testset "line_handling" begin
@test parse_fails_at(line, "test(a)") == 8 #Missing EOL at index 8
@test parse_fails_at(line, "test(a) test(b)") == 9 #Missing EOL at index 9
@test parse_fails_at(line, ";") == 1 #Missing statement at index 1
end

@testset "block_handling" begin
@test parse_fails_at(block, "test(a) test(b)") == 9 #Missing EOL at index 9
end

@testset "acset_spec_handling" begin
@test parse_fails_at(acset_spec, "begin\ntest(a)\ntest(b)\nend") == 7
# Missing head at index 1, however, parser will try and create a head until
# it reaches a white space at index 6. This can be modified for better parsing errors.
# However, for the simplicity of the grammar, I'm keeping it as is for now.
# Right now, it registers "begin" as the head and ultimately fails because "begin" is missing
# as "begin" has already been parsed in as the head.
end

# ------------ Full Scale Tests ----------- #

SchLabeledGraph = BasicSchema([:E,:V], [(:src,:E,:V),(:tgt,:E,:V)],
[:L], [(:label,:V,:L)])

@acset_type LabeledGraph(SchLabeledGraph, index=[:src,:tgt])

@testset "Constructing ACSets from Specs" begin
gspec = ACSetSpec(
:(LabeledGraph{Symbol}),
[
Statement(:V, [Kwarg(:label, Value(:a))])
Statement(:V, [Kwarg(:label, Value(:b))])
Statement(:V, [Kwarg(:label, Value(:c))])
Statement(:E, [Kwarg(:src, Value(1)), Kwarg(:tgt, Value(3))])
Statement(:E, [Kwarg(:src, Value(2)), Kwarg(:tgt, Value(3))])
]
)
g = construct(LabeledGraph{Symbol}, gspec)
@test nparts(g, :V) == 3
@test nparts(g, :E) == 2

hspec = acsetspec"""
LabeledGraph{Symbol}
begin
V(label=a)
V(label=b)
V(label=c)
E(src=1,tgt=3)
E(src=2,tgt=3)
end"""

@test construct(LabeledGraph{Symbol}, hspec) == construct(LabeledGraph{Symbol}, gspec)

#Construct cannot be tested without CombinatorialSpaces.jl
cspec = acsetspec"""
SemiSimplicialSet
begin
V(label=a, pos=(0,0))
V(label=b, pos=(1,0))
V(label=c, pos=(0,1))
E(1,2)
E(2,3)
E(3,1)
T(1,2,3)
end"""
end

end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ end
include("ACSets.jl")
end

@testset "Parsers" begin
include("Parsers.jl")
end

@testset "Serialization" begin
include("serialization/Serialization.jl")
end
Expand Down

0 comments on commit 4ea3ec1

Please sign in to comment.