-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.rkt
84 lines (78 loc) · 2.44 KB
/
lexer.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#lang racket
(require parser-tools/lex
(prefix-in : parser-tools/lex-sre))
(define-tokens value-tokens
(CHAR VAR STRING))
(define-empty-tokens op-tokens
(EOF OR
LPAREN
RPAREN
STAR
NOT
SEMI
DASH
TIL
UP
EPSILON
ARROW
START
GRM
ANY
PLUS
OPTION
AND
LBRACK
RBRACK
COMMA
AT))
(define next-token
(lexer-src-pos
[(eof) (token-EOF)]
[(:+ whitespace #\newline) (return-without-pos (next-token input-port))]
["." (token-ANY)]
["," (token-COMMA)]
["[" (token-LBRACK)]
["]" (token-RBRACK)]
["/" (token-OR)]
["+" (token-PLUS)]
["?" (token-OPTION)]
["*" (token-STAR)]
["&" (token-AND)]
["<--" (token-ARROW)]
["!" (token-NOT)]
[";" (token-SEMI)]
["-" (token-DASH)]
["~" (token-TIL)]
["^" (token-UP)]
["@" (token-AT)]
["epsilon" (token-EPSILON)]
["start:" (token-START)]
["grammar:" (token-GRM)]
[#\( (token-LPAREN)]
[#\) (token-RPAREN)]
[(:seq #\" (complement (:seq any-string #\" any-string)) #\")
(token-STRING (let* ([s lexeme]
[n (string-length s)])
(substring s 1 (- n 1))))]
[(:seq alphabetic (:* (:+ alphabetic numeric)))
(token-VAR lexeme)]
[(:seq #\' any-char #\') (token-CHAR (let* ([s lexeme]
[n (string-length s)])
(substring s 1 (- n 1))))]
[(:seq #\' #\\ #\n #\') (token-CHAR "\n")]
[(:seq #\' #\\ #\t #\') (token-CHAR "\t")]
[(:seq #\' #\\ #\r #\') (token-CHAR "\r")]
[(:seq #\' #\\ #\b #\') (token-CHAR "\b")]
[(:seq #\' #\\ #\" #\') (token-CHAR "\"")]
[(:seq #\' #\\ #\' #\') (token-CHAR "'")]
[(:seq #\' #\\ (:+ numeric) #\') (token-CHAR (let* ([s lexeme]
[n (string-length s)])
(string (integer->char
(string->number (substring s 2 (- n 1)))))))]
[(:seq #\' #\\ any-char #\') (token-CHAR (let* ([s lexeme]
[n (string-length s)])
(substring s 2 (- n 1))))]
[(:seq (:seq #\% #\%) (:* (:~ #\012 #\015)))
(return-without-pos (next-token input-port))]
))
(provide value-tokens op-tokens next-token)