-
Notifications
You must be signed in to change notification settings - Fork 0
/
lex.go
106 lines (93 loc) · 2.24 KB
/
lex.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package glambda
import (
"fmt"
"regexp"
)
type tokenType string
const (
tokenErr tokenType = `error`
tokenLambda = `\`
tokenDot = `.`
tokenLeftParen = `(`
tokenRightParen = `)`
tokenEquals = `=`
tokenComment = `comment`
tokenIdentifier = `identifier`
tokenNewLine = `newline`
tokenWhitespace = `whitespace`
tokenEOF = `EOF`
)
type token struct {
tokenType tokenType
value string
}
func (t token) String() string {
switch {
case t.tokenType == tokenEOF:
return "EOF"
case t.tokenType == tokenErr:
return t.value
case len(t.value) > 10:
return fmt.Sprintf("%.10s...", t.value)
}
return fmt.Sprintf("%s", t.value)
}
type lexer struct {
input string
position int
tokens chan token
}
func lex(input string) *lexer {
l := &lexer{
input: input,
tokens: make(chan token),
}
go l.run()
return l
}
func (l *lexer) emit(token token) {
l.tokens <- token
}
func (l *lexer) nextItem() token {
return <-l.tokens
}
func (l *lexer) errorf(format string, args ...interface{}) {
value := fmt.Sprintf(format, args...)
l.emit(token{tokenErr, value})
}
var tokenRegexes = []struct {
tokenType tokenType
regex *regexp.Regexp
}{
{tokenLambda, regexp.MustCompile(`\A([\\λ])`)},
{tokenDot, regexp.MustCompile(`\A(\.)`)},
{tokenLeftParen, regexp.MustCompile(`\A(\()`)},
{tokenRightParen, regexp.MustCompile(`\A(\))`)},
{tokenEquals, regexp.MustCompile(`\A(=)`)},
{tokenComment, regexp.MustCompile(`\A(--.*)`)},
{tokenIdentifier, regexp.MustCompile(`\A(\b[a-zA-Z0-9]+\b)`)},
{tokenNewLine, regexp.MustCompile(`\A(\n+)`)},
{tokenWhitespace, regexp.MustCompile(`\s`)},
}
func (l *lexer) lexOneToken() {
var input string
for _, tr := range tokenRegexes {
input = l.input[l.position:]
if value := tr.regex.FindString(input); value != "" {
// Ignore whitespace tokens but still want to track position
if tr.tokenType != tokenWhitespace {
l.emit(token{tr.tokenType, value})
}
l.position += len(value)
return
}
}
l.errorf("invalid token '%s'", input)
}
func (l *lexer) run() {
for l.position < len(l.input) {
l.lexOneToken()
}
l.emit(token{tokenEOF, ""})
close(l.tokens)
}