From 66d6061b231f3aaffbaab953919f0b1ff9e07303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elton=20M=C3=A1ximo=20Cardoso?= Date: Fri, 18 Feb 2022 17:52:58 -0300 Subject: [PATCH 01/10] Rename badge name --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dc4b251..b8e0020 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,3 +1,5 @@ +name: build + on: - push jobs: From 58cdcf1225c606b31c6b26ebf059abae5fa61c8c Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Sat, 19 Feb 2022 10:25:23 -0300 Subject: [PATCH 02/10] Trying to fix dependency problems --- info.rkt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/info.rkt b/info.rkt index 8ab3948..72f5012 100644 --- a/info.rkt +++ b/info.rkt @@ -2,7 +2,8 @@ (define collection "typed-peg") (define deps '("base" "pprint" - "peg-gen")) + "peg-gen" + "parser-tools-lib")) (define build-deps '("scribble-lib" "racket-doc" "rackunit-lib")) From e5a525803998e25d820e0e6f3f0b45ba49b5ab3d Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Sat, 19 Feb 2022 13:37:32 -0300 Subject: [PATCH 03/10] Fixing dependencies --- info.rkt | 1 + 1 file changed, 1 insertion(+) diff --git a/info.rkt b/info.rkt index 72f5012..d4d73b6 100644 --- a/info.rkt +++ b/info.rkt @@ -3,6 +3,7 @@ (define deps '("base" "pprint" "peg-gen" + "rackcheck" "parser-tools-lib")) (define build-deps '("scribble-lib" "racket-doc" From 6bec91624e21d3d54bf2becdcd2080318bfb6de5 Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Sat, 19 Feb 2022 13:46:45 -0300 Subject: [PATCH 04/10] Fixing links in documentations --- scribblings/typed-peg.scrbl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scribblings/typed-peg.scrbl b/scribblings/typed-peg.scrbl index c2c09cc..fd17444 100644 --- a/scribblings/typed-peg.scrbl +++ b/scribblings/typed-peg.scrbl @@ -18,7 +18,8 @@ i.e. terminates its execution on all inputs. @section{Requirements} In order to type check, the tool need a working installation of -[Z3 SMT Solver](https://github.com/Z3Prover/z3). The project is known to work with +@hyperlink["https://github.com/Z3Prover/z3"]{Z3 SMT Solver}. +The project is known to work with Z3 version 4.8.14. @section{The language typed-peg} From dc188306957cf98293aa1431afd9b57c28e4a80f Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Fri, 1 Apr 2022 14:27:55 -0300 Subject: [PATCH 05/10] Adding . expression --- core.rkt | 4 ++++ examples/test1.rkt | 4 ++-- examples/test4.rkt | 2 +- examples/test7.rkt | 2 +- grammar.rkt | 1 + lexer.rkt | 3 ++- parser.rkt | 6 ++++++ reader.rkt | 2 +- typing/constraint-gen.rkt | 1 + 9 files changed, 19 insertions(+), 6 deletions(-) diff --git a/core.rkt b/core.rkt index cf597b7..5fe9621 100644 --- a/core.rkt +++ b/core.rkt @@ -12,6 +12,10 @@ (symb) #:prefab) +(struct pany + () + #:prefab) + (struct pvar (name) #:prefab) diff --git a/examples/test1.rkt b/examples/test1.rkt index 991b636..544a32f 100644 --- a/examples/test1.rkt +++ b/examples/test1.rkt @@ -1,3 +1,3 @@ -#lang typed-peg/untyped +#lang typed-peg -start: ('a' 'a')* +start: ('a' 'a')* / ! . diff --git a/examples/test4.rkt b/examples/test4.rkt index 46f1363..982c547 100644 --- a/examples/test4.rkt +++ b/examples/test4.rkt @@ -1,4 +1,4 @@ -#lang typed-peg/untyped +#lang typed-peg/debug/infer-only S <-- !! A 'a'* B ! C ; B <-- 'b' B 'c' / epsilon ; diff --git a/examples/test7.rkt b/examples/test7.rkt index 19ea8b3..8c2b6d5 100644 --- a/examples/test7.rkt +++ b/examples/test7.rkt @@ -1,4 +1,4 @@ -#lang typed-peg/untyped +#lang typed-peg start: (epsilon)* diff --git a/grammar.rkt b/grammar.rkt index 500ec20..3556797 100644 --- a/grammar.rkt +++ b/grammar.rkt @@ -29,6 +29,7 @@ [(atom) $1]) (atom [(EPSILON) (peps)] [(CHAR) (pchr (car (string->list $1)))] + [(ANY) (pany)] [(VAR) (pvar $1)] [(LPAREN expr RPAREN) $2]) ))) diff --git a/lexer.rkt b/lexer.rkt index dccadfa..1cc87ad 100644 --- a/lexer.rkt +++ b/lexer.rkt @@ -7,12 +7,13 @@ (CHAR VAR)) (define-empty-tokens op-tokens - (EOF OR LPAREN RPAREN STAR NOT SEMI EPSILON ARROW START)) + (EOF OR LPAREN RPAREN STAR NOT SEMI EPSILON ARROW START ANY)) (define next-token (lexer-src-pos [(eof) (token-EOF)] [(:+ whitespace #\newline) (return-without-pos (next-token input-port))] + ["." (token-ANY)] ["/" (token-OR)] ["*" (token-STAR)] ["<--" (token-ARROW)] diff --git a/parser.rkt b/parser.rkt index 32f797b..5174039 100644 --- a/parser.rkt +++ b/parser.rkt @@ -27,6 +27,11 @@ (cons (tchr c) s1) '())])) +(define (run-any s) + (match s + ['() '()] + [(cons c s1) (cons (tchr c) s1)])) + (define (run-var g v s) (match (assoc v g) [#f (begin @@ -71,6 +76,7 @@ (match e [(peps) (run-eps s)] [(pchr c) (run-chr c s)] + [(pany) (run-any s)] [(pvar v) (run-var g v s)] [(pcat e1 e2) (run-cat g e1 e2 s)] [(pchoice e1 e2) (run-choice g e1 e2 s)] diff --git a/reader.rkt b/reader.rkt index 7190648..7ae39de 100644 --- a/reader.rkt +++ b/reader.rkt @@ -19,7 +19,7 @@ (define grammar (parse port)) (let ([types (infer grammar)]) (if (eq? (cdr types) 'unsat) - (displayln "The grammar isn't well-typed! It can loop on some inputs.") + (error "The grammar isn't well-typed! It can loop on some inputs.") (datum->syntax #f `(module peg-mod racket diff --git a/typing/constraint-gen.rkt b/typing/constraint-gen.rkt index 410f9df..2e213be 100644 --- a/typing/constraint-gen.rkt +++ b/typing/constraint-gen.rkt @@ -47,6 +47,7 @@ (match e [(peps) (constr-eq ty (type #t '()))] [(pchr c) (constr-eq ty (type #f '()))] + [(pany) (constr-eq ty (type #f '()))] [(pvar v) (constr-eq (pvar v) ty)] [(pchoice e1 e2) From 8d707a6a2a0691dd4ee080976da7ce15e98d2d57 Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Fri, 1 Apr 2022 15:10:45 -0300 Subject: [PATCH 06/10] Fixes on tests --- examples/test1.rkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/test1.rkt b/examples/test1.rkt index 544a32f..be8af6f 100644 --- a/examples/test1.rkt +++ b/examples/test1.rkt @@ -1,3 +1,3 @@ -#lang typed-peg +#lang typed-peg/untyped start: ('a' 'a')* / ! . From d01ad2d4b7df082979829c2f17df8bcb76c5b0f6 Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Fri, 1 Apr 2022 15:13:19 -0300 Subject: [PATCH 07/10] Fixes on tests --- examples/test4.rkt | 2 +- examples/test7.rkt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/test4.rkt b/examples/test4.rkt index 982c547..46f1363 100644 --- a/examples/test4.rkt +++ b/examples/test4.rkt @@ -1,4 +1,4 @@ -#lang typed-peg/debug/infer-only +#lang typed-peg/untyped S <-- !! A 'a'* B ! C ; B <-- 'b' B 'c' / epsilon ; diff --git a/examples/test7.rkt b/examples/test7.rkt index 8c2b6d5..19ea8b3 100644 --- a/examples/test7.rkt +++ b/examples/test7.rkt @@ -1,4 +1,4 @@ -#lang typed-peg +#lang typed-peg/untyped start: (epsilon)* From 4a3709942af301640b41df4ee5939a7c58e2110b Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Sat, 2 Apr 2022 13:49:59 -0300 Subject: [PATCH 08/10] Adding support to strings, which are desugared to concatenation of characters. --- README.md | 14 +++++++------- examples/test12.rkt | 3 +++ grammar.rkt | 10 ++++++++++ lexer.rkt | 11 +++++++---- 4 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 examples/test12.rkt diff --git a/README.md b/README.md index 6ac55ee..217f44f 100644 --- a/README.md +++ b/README.md @@ -24,11 +24,11 @@ Languages Following the racket approach to build small languages, we have build some auxiliar languages to ease the task of use/debug the tool. -* `#lang peg`: default language, provides a parse and pretty printing function for the +* `#lang typed-peg`: default language, provides a parse and pretty printing function for the specified PEG, after infering types for the input PEG. -* `#lang peg/untyped`: disable the type-inference engine. Use at your own risk! -* `#lang peg/debug/tokenize-only`: outputs the result of the lexical analyser. -* `#lang peg/debug/parse-only`: outputs the result of the parser. -* `#lang peg/debug/constraints-only`: outputs the constraints generated by the algorithm. -* `#lang peg/debug/z3-script-only`: outputs the z3 script that encode the constraints. -* `#lang peg/debug/infer-only`: outputs the infered types for each grammar non-terminal. +* `#lang typed-peg/untyped`: disable the type-inference engine. Use at your own risk! +* `#lang typed-peg/debug/tokenize-only`: outputs the result of the lexical analyser. +* `#lang typed-peg/debug/parse-only`: outputs the result of the parser. +* `#lang typed-peg/debug/constraints-only`: outputs the constraints generated by the algorithm. +* `#lang typed-peg/debug/z3-script-only`: outputs the z3 script that encode the constraints. +* `#lang typed-peg/debug/infer-only`: outputs the infered types for each grammar non-terminal. diff --git a/examples/test12.rkt b/examples/test12.rkt new file mode 100644 index 0000000..6684fc0 --- /dev/null +++ b/examples/test12.rkt @@ -0,0 +1,3 @@ +#lang typed-peg/debug/parse-only + +start: "aab" diff --git a/grammar.rkt b/grammar.rkt index 3556797..4da223e 100644 --- a/grammar.rkt +++ b/grammar.rkt @@ -4,6 +4,15 @@ typed-peg/core typed-peg/lexer) +;; converting a string token into a tree of +;; characters concatenation + +(define (string->tree s) + (match s + ['() (peps)] + [(cons c s1) (pcat (pchr c) + (string->tree s1))])) + (define core-parser (parser (start peg) @@ -29,6 +38,7 @@ [(atom) $1]) (atom [(EPSILON) (peps)] [(CHAR) (pchr (car (string->list $1)))] + [(STRING) (string->tree (string->list $1))] [(ANY) (pany)] [(VAR) (pvar $1)] [(LPAREN expr RPAREN) $2]) diff --git a/lexer.rkt b/lexer.rkt index 1cc87ad..ea2b7cb 100644 --- a/lexer.rkt +++ b/lexer.rkt @@ -4,7 +4,7 @@ (prefix-in : parser-tools/lex-sre)) (define-tokens value-tokens - (CHAR VAR)) + (CHAR VAR STRING)) (define-empty-tokens op-tokens (EOF OR LPAREN RPAREN STAR NOT SEMI EPSILON ARROW START ANY)) @@ -23,11 +23,14 @@ ["start:" (token-START)] [#\( (token-LPAREN)] [#\) (token-RPAREN)] + [(:seq #\" (complement (:seq any-string #\" any-string)) #\") + (token-STRING (let* ([s lexeme] + [n (string-length s)]) + (substring s 1 (- n 1))))] [(:seq alphabetic (:* (:+ alphabetic numeric))) (token-VAR lexeme)] - [(:seq #\' any-char #\') (token-CHAR (let* ([s lexeme] - [n (string-length s)]) - (substring s 1 (- n 1))))])) + [(:seq #\' any-char #\') (token-CHAR (let* ([s lexeme]) + (substring s 1 1)))])) (provide value-tokens op-tokens next-token) From 9c6548515ecc31a0a200d86e74d3b3c715468ebb Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Sat, 2 Apr 2022 13:58:56 -0300 Subject: [PATCH 09/10] Fixes on lexer --- examples/test1.rkt | 2 +- lexer.rkt | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/test1.rkt b/examples/test1.rkt index be8af6f..f356d94 100644 --- a/examples/test1.rkt +++ b/examples/test1.rkt @@ -1,3 +1,3 @@ -#lang typed-peg/untyped +#lang typed-peg/debug/parse-only start: ('a' 'a')* / ! . diff --git a/lexer.rkt b/lexer.rkt index ea2b7cb..2539019 100644 --- a/lexer.rkt +++ b/lexer.rkt @@ -29,8 +29,9 @@ (substring s 1 (- n 1))))] [(:seq alphabetic (:* (:+ alphabetic numeric))) (token-VAR lexeme)] - [(:seq #\' any-char #\') (token-CHAR (let* ([s lexeme]) - (substring s 1 1)))])) + [(:seq #\' any-char #\') (token-CHAR (let* ([s lexeme] + [n (string-length s)]) + (substring s 1 (- n 1))))])) (provide value-tokens op-tokens next-token) From 2a1002fa94cefb4416f9a3c29f5af1f2fd239d36 Mon Sep 17 00:00:00 2001 From: Rodrigo Ribeiro Date: Mon, 11 Apr 2022 19:59:34 -0300 Subject: [PATCH 10/10] More PEG operators --- examples/test13.rkt | 4 ++++ grammar.rkt | 13 +++++++++++-- lexer.rkt | 23 ++++++++++++++++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 examples/test13.rkt diff --git a/examples/test13.rkt b/examples/test13.rkt new file mode 100644 index 0000000..7973bcf --- /dev/null +++ b/examples/test13.rkt @@ -0,0 +1,4 @@ +#lang typed-peg/debug/parse-only + +start: & "a" / "bb" + + diff --git a/grammar.rkt b/grammar.rkt index 4da223e..e6e1195 100644 --- a/grammar.rkt +++ b/grammar.rkt @@ -10,6 +10,7 @@ (define (string->tree s) (match s ['() (peps)] + [(cons c '()) (pchr c)] [(cons c s1) (pcat (pchr c) (string->tree s1))])) @@ -32,13 +33,21 @@ [(cat) $1]) (cat [(cat term) (pcat $1 $2)] [(term) $1]) - (term [(NOT term) (pneg $2)] + (term [(prefixop term) ($1 $2)] [(factor) $1]) - (factor [(factor STAR) (pstar $1)] + (prefixop [(NOT) (lambda (e) (pneg e))] + [(AND) (lambda (e) (pneg (pneg e)))]) + (factor [(factor postfix) ($2 $1)] [(atom) $1]) + (postfix [(STAR) (lambda (e) (pstar e))] + [(PLUS) (lambda (e) (pcat e (pstar e)))] + [(OPTION) (lambda (e) (pchoice e peps))]) + (char-list [(CHAR) (pchr (car (string->list $1)))] + [(CHAR COMMA char-list) (pchoice $1 $3)]) (atom [(EPSILON) (peps)] [(CHAR) (pchr (car (string->list $1)))] [(STRING) (string->tree (string->list $1))] + [(LBRACK char-list RBRACK) $2] [(ANY) (pany)] [(VAR) (pvar $1)] [(LPAREN expr RPAREN) $2]) diff --git a/lexer.rkt b/lexer.rkt index 2539019..e61b71e 100644 --- a/lexer.rkt +++ b/lexer.rkt @@ -7,15 +7,36 @@ (CHAR VAR STRING)) (define-empty-tokens op-tokens - (EOF OR LPAREN RPAREN STAR NOT SEMI EPSILON ARROW START ANY)) + (EOF OR + LPAREN + RPAREN + STAR + NOT + SEMI + EPSILON + ARROW + START + ANY + PLUS + OPTION + AND + LBRACK + RBRACK + COMMA)) (define next-token (lexer-src-pos [(eof) (token-EOF)] [(:+ whitespace #\newline) (return-without-pos (next-token input-port))] ["." (token-ANY)] + ["," (token-COMMA)] + ["[" (token-LBRACK)] + ["]" (token-RBRACK)] ["/" (token-OR)] + ["+" (token-PLUS)] + ["?" (token-OPTION)] ["*" (token-STAR)] + ["&" (token-AND)] ["<--" (token-ARROW)] ["!" (token-NOT)] [";" (token-SEMI)]