// SPDX-FileCopyrightText: 2024 Himbeer // // SPDX-License-Identifier: GPL-3.0-or-later package main import ( "bufio" "errors" "io" "os" "strings" "unicode" ) var unexpectedToken = errors.New("unexpected token") func lex(src *os.File, tokens chan<- token, errs chan<- error) { defer close(tokens) br := bufio.NewReader(src) line := 1 for { tok, err := readToken(br, &line) if err != nil { if err == io.EOF { return } errs <- err return } if tok.kind != none { tokens <- tok } } } func readToken(br *bufio.Reader, line *int) (token, error) { r, _, err := br.ReadRune() if err != nil { return token{}, err } if r == '\n' { *line++ return token{}, nil } if unicode.IsSpace(r) { return token{}, nil } if unicode.IsLetter(r) { return lexIdentifier(br, r, *line) } if unicode.IsDigit(r) { return lexNumber(br, r, *line) } switch r { case '+': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: plusEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: plus, line: *line}, nil } case '-': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: minusEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: minus, line: *line}, nil } case '*': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: starEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: star, line: *line}, nil } case '/': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: slashEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: slash, line: *line}, nil } case '%': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: percentageEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: percentage, line: *line}, nil } case '(': return token{kind: lparen, line: *line}, nil case ')': return token{kind: rparen, line: *line}, nil case '[': return token{kind: lbracket, line: *line}, nil case ']': return token{kind: rbracket, line: *line}, nil case '{': return token{kind: lbrace, line: *line}, nil case '}': return token{kind: rbrace, line: *line}, nil case ',': return token{kind: comma, line: *line}, nil case '.': return token{kind: dot, line: *line}, nil case ':': return token{kind: colon, line: *line}, nil case ';': return token{kind: semicolon, line: *line}, nil case '=': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: doubleEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: equals, line: *line}, nil } case '!': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: bangEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: bang, line: *line}, nil } case '~': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: tildeEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: tilde, line: *line}, nil } case '^': next, _, err := br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: caretEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: caret, line: *line}, nil } case '&': next, _, err := br.ReadRune() if err != nil { return token{}, err } switch next { case '&': return token{kind: doubleAmpersand, line: *line}, nil case '=': // TODO: <<=, >>=, |= return token{kind: ampersandEquals, line: *line}, nil default: br.UnreadRune() return token{kind: ampersand, line: *line}, nil } case '|': next, _, err := br.ReadRune() if err != nil { return token{}, err } switch next { case '|': return token{kind: doublePipe, line: *line}, nil case '=': return token{kind: pipeEquals, line: *line}, nil default: br.UnreadRune() return token{kind: pipe, line: *line}, nil } case '<': next, _, err := br.ReadRune() if err != nil { return token{}, err } switch next { case '<': next, _, err = br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: doubleLangleEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: doubleLangle, line: *line}, nil } case '=': return token{kind: langleEquals, line: *line}, nil default: br.UnreadRune() return token{kind: langle, line: *line}, nil } case '>': next, _, err := br.ReadRune() if err != nil { return token{}, err } switch next { case '>': next, _, err = br.ReadRune() if err != nil { return token{}, err } if next == '=' { return token{kind: doubleRangleEquals, line: *line}, nil } else { br.UnreadRune() return token{kind: doubleRangle, line: *line}, nil } case '=': return token{kind: rangleEquals, line: *line}, nil default: br.UnreadRune() return token{kind: rangle, line: *line}, nil } case '"': return lexString(br, *line) case '#': // Discard comments. *line++ return token{}, lexComment(br) default: return token{}, unexpectedToken } } func lexIdentifier(br *bufio.Reader, first rune, line int) (token, error) { var b strings.Builder b.WriteRune(first) for { r, _, err := br.ReadRune() if err != nil { return token{}, err } if !unicode.IsLetter(r) && !unicode.IsDigit(r) { br.UnreadRune() return addKeywordOrIdentifier(b.String(), line), nil } b.WriteRune(r) } } func addKeywordOrIdentifier(name string, line int) token { switch name { case "export": return token{kind: export, line: line} case "extern": return token{kind: extern, line: line} case "func": return token{kind: function, line: line} case "return": return token{kind: ret, line: line} case "const": return token{kind: constKeyword, line: line} case "mut": return token{kind: mut, line: line} default: return token{kind: identifier, value: name, line: line} } } func lexString(br *bufio.Reader, line int) (token, error) { s, err := br.ReadString('"') if err != nil { return token{}, err } return token{kind: str, value: s[:len(s)-1], line: line}, nil } func lexNumber(br *bufio.Reader, first rune, line int) (token, error) { var b strings.Builder b.WriteRune(first) if first == '0' { second, _, err := br.ReadRune() if err != nil { if err == io.EOF { return token{ kind: number, value: b.String(), line: line, }, nil } return token{}, err } if !isDigitOrBase(second) { br.UnreadRune() return token{ kind: number, value: b.String(), line: line, }, nil } b.WriteRune(second) } for { r, _, err := br.ReadRune() if err != nil { if err == io.EOF { return token{ kind: number, value: b.String(), line: line, }, nil } return token{}, err } if r == '_' { continue } if !unicode.IsDigit(r) && r != '.' { br.UnreadRune() return token{ kind: number, value: b.String(), line: line, }, nil } b.WriteRune(r) } } func lexComment(br *bufio.Reader) error { _, err := br.ReadString('\n') return err } func isDigitOrBase(r rune) bool { return unicode.IsDigit(r) || isBase(r) } func isBase(r rune) bool { switch r { case 'x': case 'd': case 'o': case 'b': default: return false } return true }