aboutsummaryrefslogtreecommitdiff
path: root/lex.go
diff options
context:
space:
mode:
Diffstat (limited to 'lex.go')
-rw-r--r--lex.go390
1 files changed, 390 insertions, 0 deletions
diff --git a/lex.go b/lex.go
new file mode 100644
index 0000000..5d7f0b1
--- /dev/null
+++ b/lex.go
@@ -0,0 +1,390 @@
+package main
+
+import (
+ "bufio"
+ "errors"
+ "io"
+ "os"
+ "strings"
+ "unicode"
+)
+
+var unexpectedToken = errors.New("unexpected token")
+
+func lex(src *os.File, tokens chan<- token, errs chan<- error) {
+ defer close(tokens)
+
+ br := bufio.NewReader(src)
+ line := 1
+
+ for {
+ tok, err := readToken(br, &line)
+ if err != nil {
+ if err == io.EOF {
+ wg.Done()
+ return
+ }
+
+ errs <- err
+ return
+ }
+
+ if tok.kind != none {
+ tokens <- tok
+ }
+ }
+}
+
+func readToken(br *bufio.Reader, line *int) (token, error) {
+ r, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if r == '\n' {
+ *line++
+ return token{}, nil
+ }
+
+ if unicode.IsSpace(r) {
+ return token{}, nil
+ }
+
+ if unicode.IsLetter(r) {
+ return lexIdentifier(br, r, *line)
+ }
+
+ if unicode.IsDigit(r) {
+ return lexNumber(br, r, *line)
+ }
+
+ switch r {
+ case '+':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: plusEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: plus, line: *line}, nil
+ }
+ case '-':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: minusEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: minus, line: *line}, nil
+ }
+ case '*':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: starEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: star, line: *line}, nil
+ }
+ case '/':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: slashEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: slash, line: *line}, nil
+ }
+ case '(':
+ return token{kind: lparen, line: *line}, nil
+ case ')':
+ return token{kind: rparen, line: *line}, nil
+ case '[':
+ return token{kind: lbracket, line: *line}, nil
+ case ']':
+ return token{kind: rbracket, line: *line}, nil
+ case '{':
+ return token{kind: lbrace, line: *line}, nil
+ case '}':
+ return token{kind: rbrace, line: *line}, nil
+ case ',':
+ return token{kind: comma, line: *line}, nil
+ case '.':
+ return token{kind: dot, line: *line}, nil
+ case ':':
+ return token{kind: colon, line: *line}, nil
+ case ';':
+ return token{kind: semicolon, line: *line}, nil
+ case '=':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: doubleEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: equals, line: *line}, nil
+ }
+ case '!':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: bangEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: bang, line: *line}, nil
+ }
+ case '~':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: tildeEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: tilde, line: *line}, nil
+ }
+ case '^':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: caretEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: caret, line: *line}, nil
+ }
+ case '&':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ switch next {
+ case '&':
+ return token{kind: doubleAmpersand, line: *line}, nil
+ case '=': // TODO: <<=, >>=, |=
+ return token{kind: ampersandEquals, line: *line}, nil
+ default:
+ br.UnreadRune()
+ return token{kind: ampersand, line: *line}, nil
+ }
+ case '|':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ switch next {
+ case '|':
+ return token{kind: doublePipe, line: *line}, nil
+ case '=':
+ return token{kind: pipeEquals, line: *line}, nil
+ default:
+ br.UnreadRune()
+ return token{kind: pipe, line: *line}, nil
+ }
+ case '<':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ switch next {
+ case '<':
+ next, _, err = br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: doubleLangleEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: doubleLangle, line: *line}, nil
+ }
+ case '=':
+ return token{kind: langleEquals, line: *line}, nil
+ default:
+ br.UnreadRune()
+ return token{kind: langle, line: *line}, nil
+ }
+ case '>':
+ next, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ switch next {
+ case '>':
+ next, _, err = br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if next == '=' {
+ return token{kind: doubleRangleEquals, line: *line}, nil
+ } else {
+ br.UnreadRune()
+ return token{kind: doubleRangle, line: *line}, nil
+ }
+ case '=':
+ return token{kind: rangleEquals, line: *line}, nil
+ default:
+ br.UnreadRune()
+ return token{kind: rangle, line: *line}, nil
+ }
+ case '"':
+ return lexString(br, *line)
+ case '#':
+ // Discard comments.
+ return token{}, lexComment(br)
+ default:
+ return token{}, unexpectedToken
+ }
+}
+
+func lexIdentifier(br *bufio.Reader, first rune, line int) (token, error) {
+ var b strings.Builder
+ b.WriteRune(first)
+
+ for {
+ r, _, err := br.ReadRune()
+ if err != nil {
+ return token{}, err
+ }
+
+ if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+ br.UnreadRune()
+ return addKeywordOrIdentifier(b.String(), line), nil
+ }
+
+ b.WriteRune(r)
+ }
+}
+
+func addKeywordOrIdentifier(name string, line int) token {
+ switch name {
+ case "export":
+ return token{kind: export, line: line}
+ case "func":
+ return token{kind: function, line: line}
+ case "return":
+ return token{kind: ret, line: line}
+ default:
+ return token{kind: identifier, value: name, line: line}
+ }
+}
+
+func lexString(br *bufio.Reader, line int) (token, error) {
+ s, err := br.ReadString('"')
+ if err != nil {
+ return token{}, err
+ }
+
+ return token{kind: str, value: s[:len(s)-1], line: line}, nil
+}
+
+func lexNumber(br *bufio.Reader, first rune, line int) (token, error) {
+ var b strings.Builder
+ b.WriteRune(first)
+
+ if first == '0' {
+ second, _, err := br.ReadRune()
+ if err != nil {
+ if err == io.EOF {
+ return token{
+ kind: number,
+ value: b.String(),
+ line: line,
+ }, nil
+ }
+
+ return token{}, err
+ }
+
+ if !isDigitOrBase(second) {
+ br.UnreadRune()
+ return token{
+ kind: number,
+ value: b.String(),
+ line: line,
+ }, nil
+ }
+
+ b.WriteRune(second)
+ }
+
+ for {
+ r, _, err := br.ReadRune()
+ if err != nil {
+ if err == io.EOF {
+ return token{
+ kind: number,
+ value: b.String(),
+ line: line,
+ }, nil
+ }
+
+ return token{}, err
+ }
+
+ if r == '_' {
+ continue
+ }
+
+ if !unicode.IsDigit(r) && r != '.' {
+ br.UnreadRune()
+ return token{
+ kind: number,
+ value: b.String(),
+ line: line,
+ }, nil
+ }
+
+ b.WriteRune(r)
+ }
+}
+
+func lexComment(br *bufio.Reader) error {
+ _, err := br.ReadString('\n')
+ return err
+}
+
+func isDigitOrBase(r rune) bool {
+ return unicode.IsDigit(r) || isBase(r)
+}
+
+func isBase(r rune) bool {
+ switch r {
+ case 'x':
+ case 'd':
+ case 'o':
+ case 'b':
+ default:
+ return false
+ }
+
+ return true
+}