diff options
Diffstat (limited to 'lex.go')
-rw-r--r-- | lex.go | 390 |
1 files changed, 390 insertions, 0 deletions
@@ -0,0 +1,390 @@ +package main + +import ( + "bufio" + "errors" + "io" + "os" + "strings" + "unicode" +) + +var unexpectedToken = errors.New("unexpected token") + +func lex(src *os.File, tokens chan<- token, errs chan<- error) { + defer close(tokens) + + br := bufio.NewReader(src) + line := 1 + + for { + tok, err := readToken(br, &line) + if err != nil { + if err == io.EOF { + wg.Done() + return + } + + errs <- err + return + } + + if tok.kind != none { + tokens <- tok + } + } +} + +func readToken(br *bufio.Reader, line *int) (token, error) { + r, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if r == '\n' { + *line++ + return token{}, nil + } + + if unicode.IsSpace(r) { + return token{}, nil + } + + if unicode.IsLetter(r) { + return lexIdentifier(br, r, *line) + } + + if unicode.IsDigit(r) { + return lexNumber(br, r, *line) + } + + switch r { + case '+': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: plusEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: plus, line: *line}, nil + } + case '-': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: minusEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: minus, line: *line}, nil + } + case '*': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: starEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: star, line: *line}, nil + } + case '/': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: slashEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: slash, line: *line}, nil + } + case '(': + return token{kind: lparen, line: *line}, nil + case ')': + return token{kind: rparen, line: *line}, nil + case '[': + return token{kind: lbracket, line: *line}, nil + case ']': + return token{kind: rbracket, line: *line}, nil + case '{': + return token{kind: lbrace, line: *line}, nil + case '}': + return token{kind: rbrace, line: *line}, nil + case ',': + return token{kind: comma, line: *line}, nil + case '.': + return token{kind: dot, line: *line}, nil + case ':': + return token{kind: colon, line: *line}, nil + case ';': + return token{kind: semicolon, line: *line}, nil + case '=': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: doubleEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: equals, line: *line}, nil + } + case '!': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: bangEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: bang, line: *line}, nil + } + case '~': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: tildeEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: tilde, line: *line}, nil + } + case '^': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: caretEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: caret, line: *line}, nil + } + case '&': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '&': + return token{kind: doubleAmpersand, line: *line}, nil + case '=': // TODO: <<=, >>=, |= + return token{kind: ampersandEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: ampersand, line: *line}, nil + } + case '|': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '|': + return token{kind: doublePipe, line: *line}, nil + case '=': + return token{kind: pipeEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: pipe, line: *line}, nil + } + case '<': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '<': + next, _, err = br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: doubleLangleEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: doubleLangle, line: *line}, nil + } + case '=': + return token{kind: langleEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: langle, line: *line}, nil + } + case '>': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '>': + next, _, err = br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: doubleRangleEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: doubleRangle, line: *line}, nil + } + case '=': + return token{kind: rangleEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: rangle, line: *line}, nil + } + case '"': + return lexString(br, *line) + case '#': + // Discard comments. + return token{}, lexComment(br) + default: + return token{}, unexpectedToken + } +} + +func lexIdentifier(br *bufio.Reader, first rune, line int) (token, error) { + var b strings.Builder + b.WriteRune(first) + + for { + r, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + br.UnreadRune() + return addKeywordOrIdentifier(b.String(), line), nil + } + + b.WriteRune(r) + } +} + +func addKeywordOrIdentifier(name string, line int) token { + switch name { + case "export": + return token{kind: export, line: line} + case "func": + return token{kind: function, line: line} + case "return": + return token{kind: ret, line: line} + default: + return token{kind: identifier, value: name, line: line} + } +} + +func lexString(br *bufio.Reader, line int) (token, error) { + s, err := br.ReadString('"') + if err != nil { + return token{}, err + } + + return token{kind: str, value: s[:len(s)-1], line: line}, nil +} + +func lexNumber(br *bufio.Reader, first rune, line int) (token, error) { + var b strings.Builder + b.WriteRune(first) + + if first == '0' { + second, _, err := br.ReadRune() + if err != nil { + if err == io.EOF { + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + return token{}, err + } + + if !isDigitOrBase(second) { + br.UnreadRune() + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + b.WriteRune(second) + } + + for { + r, _, err := br.ReadRune() + if err != nil { + if err == io.EOF { + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + return token{}, err + } + + if r == '_' { + continue + } + + if !unicode.IsDigit(r) && r != '.' { + br.UnreadRune() + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + b.WriteRune(r) + } +} + +func lexComment(br *bufio.Reader) error { + _, err := br.ReadString('\n') + return err +} + +func isDigitOrBase(r rune) bool { + return unicode.IsDigit(r) || isBase(r) +} + +func isBase(r rune) bool { + switch r { + case 'x': + case 'd': + case 'o': + case 'b': + default: + return false + } + + return true +} |