diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dc4b251..b8e0020 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,3 +1,5 @@ +name: build + on: - push jobs: diff --git a/README.md b/README.md index 6ac55ee..217f44f 100644 --- a/README.md +++ b/README.md @@ -24,11 +24,11 @@ Languages Following the racket approach to build small languages, we have build some auxiliar languages to ease the task of use/debug the tool. -* `#lang peg`: default language, provides a parse and pretty printing function for the +* `#lang typed-peg`: default language, provides a parse and pretty printing function for the specified PEG, after infering types for the input PEG. -* `#lang peg/untyped`: disable the type-inference engine. Use at your own risk! -* `#lang peg/debug/tokenize-only`: outputs the result of the lexical analyser. -* `#lang peg/debug/parse-only`: outputs the result of the parser. -* `#lang peg/debug/constraints-only`: outputs the constraints generated by the algorithm. -* `#lang peg/debug/z3-script-only`: outputs the z3 script that encode the constraints. -* `#lang peg/debug/infer-only`: outputs the infered types for each grammar non-terminal. +* `#lang typed-peg/untyped`: disable the type-inference engine. Use at your own risk! +* `#lang typed-peg/debug/tokenize-only`: outputs the result of the lexical analyser. +* `#lang typed-peg/debug/parse-only`: outputs the result of the parser. +* `#lang typed-peg/debug/constraints-only`: outputs the constraints generated by the algorithm. +* `#lang typed-peg/debug/z3-script-only`: outputs the z3 script that encode the constraints. +* `#lang typed-peg/debug/infer-only`: outputs the infered types for each grammar non-terminal. diff --git a/core.rkt b/core.rkt index cf597b7..5fe9621 100644 --- a/core.rkt +++ b/core.rkt @@ -12,6 +12,10 @@ (symb) #:prefab) +(struct pany + () + #:prefab) + (struct pvar (name) #:prefab) diff --git a/examples/test1.rkt b/examples/test1.rkt index 991b636..f356d94 100644 --- a/examples/test1.rkt +++ b/examples/test1.rkt @@ -1,3 +1,3 @@ -#lang typed-peg/untyped +#lang typed-peg/debug/parse-only -start: ('a' 'a')* +start: ('a' 'a')* / ! . diff --git a/examples/test12.rkt b/examples/test12.rkt index 7025098..6afa7d9 100644 --- a/examples/test12.rkt +++ b/examples/test12.rkt @@ -4,3 +4,7 @@ K <-- (epsilon '3') / (epsilon '2'); C <-- ('2' / E) (! K); E <-- ('1' / '3') ('3' / C); start: (epsilon C) * +======= +#lang typed-peg/debug/parse-only + +start: "aab" diff --git a/examples/test13.rkt b/examples/test13.rkt new file mode 100644 index 0000000..7973bcf --- /dev/null +++ b/examples/test13.rkt @@ -0,0 +1,4 @@ +#lang typed-peg/debug/parse-only + +start: & "a" / "bb" + + diff --git a/grammar.rkt b/grammar.rkt index 500ec20..e6e1195 100644 --- a/grammar.rkt +++ b/grammar.rkt @@ -4,6 +4,16 @@ typed-peg/core typed-peg/lexer) +;; converting a string token into a tree of +;; characters concatenation + +(define (string->tree s) + (match s + ['() (peps)] + [(cons c '()) (pchr c)] + [(cons c s1) (pcat (pchr c) + (string->tree s1))])) + (define core-parser (parser (start peg) @@ -23,12 +33,22 @@ [(cat) $1]) (cat [(cat term) (pcat $1 $2)] [(term) $1]) - (term [(NOT term) (pneg $2)] + (term [(prefixop term) ($1 $2)] [(factor) $1]) - (factor [(factor STAR) (pstar $1)] + (prefixop [(NOT) (lambda (e) (pneg e))] + [(AND) (lambda (e) (pneg (pneg e)))]) + (factor [(factor postfix) ($2 $1)] [(atom) $1]) + (postfix [(STAR) (lambda (e) (pstar e))] + [(PLUS) (lambda (e) (pcat e (pstar e)))] + [(OPTION) (lambda (e) (pchoice e peps))]) + (char-list [(CHAR) (pchr (car (string->list $1)))] + [(CHAR COMMA char-list) (pchoice $1 $3)]) (atom [(EPSILON) (peps)] [(CHAR) (pchr (car (string->list $1)))] + [(STRING) (string->tree (string->list $1))] + [(LBRACK char-list RBRACK) $2] + [(ANY) (pany)] [(VAR) (pvar $1)] [(LPAREN expr RPAREN) $2]) ))) diff --git a/info.rkt b/info.rkt index 8ab3948..d4d73b6 100644 --- a/info.rkt +++ b/info.rkt @@ -2,7 +2,9 @@ (define collection "typed-peg") (define deps '("base" "pprint" - "peg-gen")) + "peg-gen" + "rackcheck" + "parser-tools-lib")) (define build-deps '("scribble-lib" "racket-doc" "rackunit-lib")) diff --git a/lexer.rkt b/lexer.rkt index dccadfa..e61b71e 100644 --- a/lexer.rkt +++ b/lexer.rkt @@ -4,17 +4,39 @@ (prefix-in : parser-tools/lex-sre)) (define-tokens value-tokens - (CHAR VAR)) + (CHAR VAR STRING)) (define-empty-tokens op-tokens - (EOF OR LPAREN RPAREN STAR NOT SEMI EPSILON ARROW START)) + (EOF OR + LPAREN + RPAREN + STAR + NOT + SEMI + EPSILON + ARROW + START + ANY + PLUS + OPTION + AND + LBRACK + RBRACK + COMMA)) (define next-token (lexer-src-pos [(eof) (token-EOF)] [(:+ whitespace #\newline) (return-without-pos (next-token input-port))] + ["." (token-ANY)] + ["," (token-COMMA)] + ["[" (token-LBRACK)] + ["]" (token-RBRACK)] ["/" (token-OR)] + ["+" (token-PLUS)] + ["?" (token-OPTION)] ["*" (token-STAR)] + ["&" (token-AND)] ["<--" (token-ARROW)] ["!" (token-NOT)] [";" (token-SEMI)] @@ -22,6 +44,10 @@ ["start:" (token-START)] [#\( (token-LPAREN)] [#\) (token-RPAREN)] + [(:seq #\" (complement (:seq any-string #\" any-string)) #\") + (token-STRING (let* ([s lexeme] + [n (string-length s)]) + (substring s 1 (- n 1))))] [(:seq alphabetic (:* (:+ alphabetic numeric))) (token-VAR lexeme)] [(:seq #\' any-char #\') (token-CHAR (let* ([s lexeme] diff --git a/parser.rkt b/parser.rkt index 32f797b..5174039 100644 --- a/parser.rkt +++ b/parser.rkt @@ -27,6 +27,11 @@ (cons (tchr c) s1) '())])) +(define (run-any s) + (match s + ['() '()] + [(cons c s1) (cons (tchr c) s1)])) + (define (run-var g v s) (match (assoc v g) [#f (begin @@ -71,6 +76,7 @@ (match e [(peps) (run-eps s)] [(pchr c) (run-chr c s)] + [(pany) (run-any s)] [(pvar v) (run-var g v s)] [(pcat e1 e2) (run-cat g e1 e2 s)] [(pchoice e1 e2) (run-choice g e1 e2 s)] diff --git a/reader.rkt b/reader.rkt index 7190648..7ae39de 100644 --- a/reader.rkt +++ b/reader.rkt @@ -19,7 +19,7 @@ (define grammar (parse port)) (let ([types (infer grammar)]) (if (eq? (cdr types) 'unsat) - (displayln "The grammar isn't well-typed! It can loop on some inputs.") + (error "The grammar isn't well-typed! It can loop on some inputs.") (datum->syntax #f `(module peg-mod racket diff --git a/scribblings/typed-peg.scrbl b/scribblings/typed-peg.scrbl index c2c09cc..fd17444 100644 --- a/scribblings/typed-peg.scrbl +++ b/scribblings/typed-peg.scrbl @@ -18,7 +18,8 @@ i.e. terminates its execution on all inputs. @section{Requirements} In order to type check, the tool need a working installation of -[Z3 SMT Solver](https://github.com/Z3Prover/z3). The project is known to work with +@hyperlink["https://github.com/Z3Prover/z3"]{Z3 SMT Solver}. +The project is known to work with Z3 version 4.8.14. @section{The language typed-peg} diff --git a/typing/constraint-gen.rkt b/typing/constraint-gen.rkt index 410f9df..2e213be 100644 --- a/typing/constraint-gen.rkt +++ b/typing/constraint-gen.rkt @@ -47,6 +47,7 @@ (match e [(peps) (constr-eq ty (type #t '()))] [(pchr c) (constr-eq ty (type #f '()))] + [(pany) (constr-eq ty (type #f '()))] [(pvar v) (constr-eq (pvar v) ty)] [(pchoice e1 e2)