diff options
author | Himbeer <himbeer@disroot.org> | 2024-08-30 09:17:43 +0200 |
---|---|---|
committer | Himbeer <himbeer@disroot.org> | 2024-08-30 09:17:43 +0200 |
commit | 3adfc2d52c29fd90257813f33b8286225f7adc04 (patch) | |
tree | 39e4d064af273fc1fc1e271a4f4a135a4227e102 |
Initial commit
-rw-r--r-- | GRAMMAR.txt | 23 | ||||
-rw-r--r-- | cer.go | 45 | ||||
-rw-r--r-- | expression.go | 251 | ||||
-rw-r--r-- | generate.go | 85 | ||||
-rw-r--r-- | go.mod | 3 | ||||
-rw-r--r-- | lex.go | 390 | ||||
-rw-r--r-- | op.go | 47 | ||||
-rw-r--r-- | parse.go | 766 | ||||
-rw-r--r-- | parse_error.go | 44 | ||||
-rw-r--r-- | token.go | 172 |
10 files changed, 1826 insertions, 0 deletions
diff --git a/GRAMMAR.txt b/GRAMMAR.txt new file mode 100644 index 0000000..a44e086 --- /dev/null +++ b/GRAMMAR.txt @@ -0,0 +1,23 @@ +root -> toplevel* +toplevel -> function + +function -> "func" IDENTIFIER "(" param? ")" IDENTIFIER block +param -> IDENTIFIER IDENTIFIER ( "," param )? + +block -> "{" ( statement )* "}" + +statement -> ( return ) ";" +return -> "return" expression + +expression -> equality +equality -> comparison ( ( "==" | "!=" ) comparison )* +comparison -> term ( ( "<" | "<=" | ">" | ">=" ) term )? +term -> numeral ( ( "<<" | ">>" ) numeral )* +numeral -> factor ( ( "+" | "-" ) factor )* +factor -> unary ( ( "*" | "/" ) unary )* +unary -> ( "-" | "!" | "~" )? primary +primary -> grouping | literal +grouping -> "(" expression ")" +literal -> string | number +string -> STRING* +number -> IDENTIFIER "(" NUMBER ")" @@ -0,0 +1,45 @@ +/* +cer-bootstrap is the bootstrapping compiler for the Cer programming +language. +*/ +package main + +import ( + "fmt" + "os" + "sync" +) + +var wg sync.WaitGroup + +func main() { + wg.Add(3) + + errs := make(chan error) + + go readErrs(errs) + + toks := make(chan token, 1024) + go lex(os.Stdin, toks, errs) + + // for token := range tokens { + // fmt.Println(token) + // } + + root := make(chan *rootExpr, 1) + go parse(&tokens{ + src: toks, + toks: make([]token, 0, len(toks)), + }, root, errs) + + // fmt.Printf("%+v\n", <-expr) + + go generate(<-root, os.Stdout, errs) + + wg.Wait() +} + +func readErrs(errs <-chan error) { + fmt.Fprintln(os.Stderr, <-errs) + os.Exit(1) +} diff --git a/expression.go b/expression.go new file mode 100644 index 0000000..0498d35 --- /dev/null +++ b/expression.go @@ -0,0 +1,251 @@ +package main + +import "fmt" + +type expression interface { + markExpr() + line() int +} + +type rootExpr struct { + toplevels []expression + ln int +} + +func (r *rootExpr) markExpr() {} + +func (r *rootExpr) line() int { return r.ln } + +type linkage int + +const ( + defaultLinkage linkage = iota + exportLinkage +) + +func (l linkage) String() string { + switch l { + case exportLinkage: + return "export" + } + + return "" +} + +type functionExpr struct { + link linkage + name string + params *paramExpr + returnType string + blk *blockExpr + ln int +} + +func (f *functionExpr) markExpr() {} + +func (f *functionExpr) line() int { return f.ln } + +type paramExpr struct { + name string + typ string + next *paramExpr + ln int +} + +func (p *paramExpr) markExpr() {} + +func (p *paramExpr) line() int { return p.ln } + +func (p *paramExpr) String() string { + if p == nil { + return "" + } + + s := fmt.Sprintf("%s %%%s", p.typ, p.name) + if p.next != nil { + s += p.next.String() + } + + return s +} + +type blockExpr struct { + stmts []statementExpr + ln int +} + +func (b *blockExpr) markExpr() {} + +func (b *blockExpr) line() int { return b.ln } + +type statementExpr interface { + expression + markStmt() +} + +type returnStmt struct { + value exprExpr + ln int +} + +func (r *returnStmt) markExpr() {} + +func (r *returnStmt) markStmt() {} + +func (r *returnStmt) line() int { return r.ln } + +type exprExpr interface { + expression + markExprExpr() +} + +type equalityExpr struct { + lhs *comparisonExpr + rhs []*equalityRhs + ln int +} + +func (e *equalityExpr) markExpr() {} + +func (e *equalityExpr) markExprExpr() {} + +func (e *equalityExpr) line() int { return e.ln } + +type equalityRhs struct { + op equalityOp + value *comparisonExpr +} + +type comparisonExpr struct { + lhs *termExpr + rhs *comparisonRhs + ln int +} + +func (c *comparisonExpr) markExpr() {} + +func (c *comparisonExpr) markExprExpr() {} + +func (c *comparisonExpr) line() int { return c.ln } + +type comparisonRhs struct { + op comparisonOp + value *termExpr +} + +type termExpr struct { + lhs *numeralExpr + rhs []*termRhs + ln int +} + +func (t *termExpr) markExpr() {} + +func (t *termExpr) markExprExpr() {} + +func (t *termExpr) line() int { return t.ln } + +type termRhs struct { + op shiftOp + value *numeralExpr +} + +type numeralExpr struct { + lhs *factorExpr + rhs []*numeralRhs + ln int +} + +func (n *numeralExpr) markExpr() {} + +func (n *numeralExpr) markExprExpr() {} + +func (n *numeralExpr) line() int { return n.ln } + +type numeralRhs struct { + op addSubOp + value *factorExpr +} + +type factorExpr struct { + lhs *unaryExpr + rhs []*factorRhs + ln int +} + +func (f *factorExpr) markExpr() {} + +func (f *factorExpr) markExprExpr() {} + +func (f *factorExpr) line() int { return f.ln } + +type factorRhs struct { + op mulDivOp + value *unaryExpr +} + +type unaryExpr struct { + op unaryOp + value primaryExpr + ln int +} + +func (u *unaryExpr) markExpr() {} + +func (u *unaryExpr) markExprExpr() {} + +func (u *unaryExpr) line() int { return u.ln } + +type primaryExpr interface { + exprExpr + markPrimaryExpr() +} + +type literalExpr interface { + primaryExpr + markLiteralExpr() +} + +type stringExpr struct { + segments []string + ln int +} + +func (s *stringExpr) markExpr() {} + +func (s *stringExpr) markExprExpr() {} + +func (s *stringExpr) markPrimaryExpr() {} + +func (s *stringExpr) markLiteralExpr() {} + +func (s *stringExpr) line() int { return s.ln } + +type numberExpr struct { + typ string + s string + ln int +} + +func (n *numberExpr) markExpr() {} + +func (n *numberExpr) markExprExpr() {} + +func (n *numberExpr) markPrimaryExpr() {} + +func (n *numberExpr) markLiteralExpr() {} + +func (n *numberExpr) line() int { return n.ln } + +type groupingExpr struct { + inner exprExpr + ln int +} + +func (g *groupingExpr) markExpr() {} + +func (g *groupingExpr) markExprExpr() {} + +func (g *groupingExpr) markPrimaryExpr() {} + +func (g *groupingExpr) line() int { return g.ln } diff --git a/generate.go b/generate.go new file mode 100644 index 0000000..75da82e --- /dev/null +++ b/generate.go @@ -0,0 +1,85 @@ +package main + +import ( + "fmt" + "io" +) + +type invalidToplevel struct { + got expression +} + +func (i invalidToplevel) Error() string { + return fmt.Sprintf("%d: expected top-level declaration, got %s", i.got.line(), i.got) +} + +func generate(root *rootExpr, w io.Writer, errs chan<- error) { + defer wg.Done() + + for _, toplevel := range root.toplevels { + if err := generateToplevel(toplevel, w); err != nil { + errs <- err + } + } +} + +func generateToplevel(toplevel expression, w io.Writer) error { + switch expr := toplevel.(type) { + case *functionExpr: + return generateFunction(expr, w) + } + + return invalidToplevel{got: toplevel} +} + +func generateFunction(function *functionExpr, w io.Writer) error { + if function.link != defaultLinkage { + fmt.Fprintf(w, "%s ", function.link) + } + + fmt.Fprintf(w, "function %s $%s", function.returnType, function.name) + fmt.Fprintf(w, "(%s) {\n", function.params) + fmt.Fprintln(w, "@start") + if err := generateBlock(function.blk, w); err != nil { + return err + } + fmt.Fprintf(w, "}\n") + + return nil +} + +func generateBlock(blk *blockExpr, w io.Writer) error { + for _, stmt := range blk.stmts { + if err := generateStatement(stmt, w); err != nil { + return err + } + } + + return nil +} + +func generateStatement(stmt statementExpr, w io.Writer) error { + switch s := stmt.(type) { + case *returnStmt: + return generateReturnStmt(s, w) + } + + return nil +} + +func generateReturnStmt(stmt *returnStmt, w io.Writer) error { + eq := stmt.value.(*equalityExpr) + cmp := eq.lhs + trm := cmp.lhs + num := trm.lhs + fct := num.lhs + un := fct.lhs + prm := un.value + lit := prm.(literalExpr) + n := lit.(*numberExpr) + value := n.s + + fmt.Fprintln(w, "ret", value) + + return nil +} @@ -0,0 +1,3 @@ +module codeberg.org/Himbeer/cer-bootstrap + +go 1.21.6 @@ -0,0 +1,390 @@ +package main + +import ( + "bufio" + "errors" + "io" + "os" + "strings" + "unicode" +) + +var unexpectedToken = errors.New("unexpected token") + +func lex(src *os.File, tokens chan<- token, errs chan<- error) { + defer close(tokens) + + br := bufio.NewReader(src) + line := 1 + + for { + tok, err := readToken(br, &line) + if err != nil { + if err == io.EOF { + wg.Done() + return + } + + errs <- err + return + } + + if tok.kind != none { + tokens <- tok + } + } +} + +func readToken(br *bufio.Reader, line *int) (token, error) { + r, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if r == '\n' { + *line++ + return token{}, nil + } + + if unicode.IsSpace(r) { + return token{}, nil + } + + if unicode.IsLetter(r) { + return lexIdentifier(br, r, *line) + } + + if unicode.IsDigit(r) { + return lexNumber(br, r, *line) + } + + switch r { + case '+': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: plusEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: plus, line: *line}, nil + } + case '-': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: minusEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: minus, line: *line}, nil + } + case '*': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: starEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: star, line: *line}, nil + } + case '/': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: slashEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: slash, line: *line}, nil + } + case '(': + return token{kind: lparen, line: *line}, nil + case ')': + return token{kind: rparen, line: *line}, nil + case '[': + return token{kind: lbracket, line: *line}, nil + case ']': + return token{kind: rbracket, line: *line}, nil + case '{': + return token{kind: lbrace, line: *line}, nil + case '}': + return token{kind: rbrace, line: *line}, nil + case ',': + return token{kind: comma, line: *line}, nil + case '.': + return token{kind: dot, line: *line}, nil + case ':': + return token{kind: colon, line: *line}, nil + case ';': + return token{kind: semicolon, line: *line}, nil + case '=': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: doubleEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: equals, line: *line}, nil + } + case '!': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: bangEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: bang, line: *line}, nil + } + case '~': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: tildeEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: tilde, line: *line}, nil + } + case '^': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: caretEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: caret, line: *line}, nil + } + case '&': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '&': + return token{kind: doubleAmpersand, line: *line}, nil + case '=': // TODO: <<=, >>=, |= + return token{kind: ampersandEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: ampersand, line: *line}, nil + } + case '|': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '|': + return token{kind: doublePipe, line: *line}, nil + case '=': + return token{kind: pipeEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: pipe, line: *line}, nil + } + case '<': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '<': + next, _, err = br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: doubleLangleEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: doubleLangle, line: *line}, nil + } + case '=': + return token{kind: langleEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: langle, line: *line}, nil + } + case '>': + next, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + switch next { + case '>': + next, _, err = br.ReadRune() + if err != nil { + return token{}, err + } + + if next == '=' { + return token{kind: doubleRangleEquals, line: *line}, nil + } else { + br.UnreadRune() + return token{kind: doubleRangle, line: *line}, nil + } + case '=': + return token{kind: rangleEquals, line: *line}, nil + default: + br.UnreadRune() + return token{kind: rangle, line: *line}, nil + } + case '"': + return lexString(br, *line) + case '#': + // Discard comments. + return token{}, lexComment(br) + default: + return token{}, unexpectedToken + } +} + +func lexIdentifier(br *bufio.Reader, first rune, line int) (token, error) { + var b strings.Builder + b.WriteRune(first) + + for { + r, _, err := br.ReadRune() + if err != nil { + return token{}, err + } + + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + br.UnreadRune() + return addKeywordOrIdentifier(b.String(), line), nil + } + + b.WriteRune(r) + } +} + +func addKeywordOrIdentifier(name string, line int) token { + switch name { + case "export": + return token{kind: export, line: line} + case "func": + return token{kind: function, line: line} + case "return": + return token{kind: ret, line: line} + default: + return token{kind: identifier, value: name, line: line} + } +} + +func lexString(br *bufio.Reader, line int) (token, error) { + s, err := br.ReadString('"') + if err != nil { + return token{}, err + } + + return token{kind: str, value: s[:len(s)-1], line: line}, nil +} + +func lexNumber(br *bufio.Reader, first rune, line int) (token, error) { + var b strings.Builder + b.WriteRune(first) + + if first == '0' { + second, _, err := br.ReadRune() + if err != nil { + if err == io.EOF { + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + return token{}, err + } + + if !isDigitOrBase(second) { + br.UnreadRune() + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + b.WriteRune(second) + } + + for { + r, _, err := br.ReadRune() + if err != nil { + if err == io.EOF { + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + return token{}, err + } + + if r == '_' { + continue + } + + if !unicode.IsDigit(r) && r != '.' { + br.UnreadRune() + return token{ + kind: number, + value: b.String(), + line: line, + }, nil + } + + b.WriteRune(r) + } +} + +func lexComment(br *bufio.Reader) error { + _, err := br.ReadString('\n') + return err +} + +func isDigitOrBase(r rune) bool { + return unicode.IsDigit(r) || isBase(r) +} + +func isBase(r rune) bool { + switch r { + case 'x': + case 'd': + case 'o': + case 'b': + default: + return false + } + + return true +} @@ -0,0 +1,47 @@ +package main + +type equalityOp int + +const ( + equalTo equalityOp = iota + notEqualTo +) + +type comparisonOp int + +const ( + lessThan comparisonOp = iota + lessThanOrEqualTo + greaterThan + greaterThanOrEqualTo +) + +type shiftOp int + +const ( + shiftLeft shiftOp = iota + shiftRight +) + +type addSubOp int + +const ( + add addSubOp = iota + subtract +) + +type mulDivOp int + +const ( + multiply mulDivOp = iota + divide +) + +type unaryOp int + +const ( + unaryIdentity unaryOp = iota + negate + invertLogical + invertBits +) diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..2abd111 --- /dev/null +++ b/parse.go @@ -0,0 +1,766 @@ +package main + +import ( + "errors" +) + +var unexpectedEOF = errors.New("unexpected EOF") + +func (t *tokens) consumeToken() (token, bool) { + if t.index >= len(t.toks) { + tok, ok := <-t.src + if !ok { + return token{}, false + } + t.toks = append(t.toks, tok) + } + tok := t.toks[t.index] + t.index++ + return tok, true +} + +func (t *tokens) unreadToken() { + t.index-- +} + +func (t *tokens) current() token { + return t.toks[t.index-1] +} + +func (t *tokens) eof() bool { + _, ok := t.consumeToken() + if ok { + t.unreadToken() + } + return !ok +} + +func (t *tokens) match(kind tokenKind) (bool, error) { + tok, ok := t.consumeToken() + if !ok { + return false, unexpectedEOF + } + return tok.kind == kind, nil +} + +func (t *tokens) mustMatch(kind tokenKind) error { + ok, err := t.match(kind) + if err != nil { + return err + } + + if !ok { + return expected{want: kind, got: t.current()} + } + + return nil +} + +func parse(toks *tokens, root chan<- *rootExpr, errs chan<- error) { + defer wg.Done() + root <- parseRoot(toks, errs) +} + +func parseRoot(toks *tokens, errs chan<- error) *rootExpr { + toplevels := make([]expression, 0) + for !toks.eof() { + if toplevel := parseToplevel(toks, errs); toplevel != nil { + toplevels = append(toplevels, toplevel) + } + } + + return &rootExpr{ + toplevels: toplevels, + } +} + +func parseToplevel(toks *tokens, errs chan<- error) expression { + function := parseFunction(toks, errs) + if function == nil { + errs <- expectedToplevel{got: toks.current()} + return nil + } + + return function +} + +func parseFunction(toks *tokens, errs chan<- error) *functionExpr { + var link linkage + + ok, err := toks.match(export) + if err != nil { + errs <- err + return nil + } + if ok { + link = exportLinkage + } else { + toks.unreadToken() + } + + ok, err = toks.match(function) + if err != nil { + errs <- err + return nil + } + if !ok { + return nil + } + + nameTok, ok := toks.consumeToken() + if !ok { + errs <- unexpectedEOF + return nil + } + if nameTok.kind != identifier { + errs <- expected{want: identifier, got: nameTok} + return nil + } + + if err := toks.mustMatch(lparen); err != nil { + errs <- err + return nil + } + + var params *paramExpr + + ok, err = toks.match(rparen) + if err != nil { + errs <- err + return nil + } + if !ok { + toks.unreadToken() + + params, err = parseParam(toks) + if err != nil { + errs <- err + return nil + } + + if err := toks.mustMatch(rparen); err != nil { + errs <- err + return nil + } + } + + returnTypeTok, ok := toks.consumeToken() + if !ok { + errs <- unexpectedEOF + return nil + } + if returnTypeTok.kind != identifier { + errs <- expected{want: identifier, got: returnTypeTok} + return nil + } + + blk, err := parseBlock(toks) + if err != nil { + errs <- err + return nil + } + + return &functionExpr{ + link: link, + name: nameTok.value, + params: params, + returnType: returnTypeTok.value, + blk: blk, + } +} + +func parseParam(toks *tokens) (*paramExpr, error) { + nameTok, ok := toks.consumeToken() + if !ok { + return nil, unexpectedEOF + } + if nameTok.kind != identifier { + return nil, expected{want: identifier, got: nameTok} + } + + typeTok, ok := toks.consumeToken() + if !ok { + return nil, unexpectedEOF + } + if typeTok.kind != identifier { + return nil, expected{want: identifier, got: typeTok} + } + + var next *paramExpr + + ok, err := toks.match(comma) + if err != nil { + return nil, err + } + if ok { + next, err = parseParam(toks) + if err != nil { + return nil, err + } + } else { + toks.unreadToken() + } + + return ¶mExpr{ + name: nameTok.value, + typ: typeTok.value, + next: next, + }, nil +} + +func parseBlock(toks *tokens) (*blockExpr, error) { + if err := toks.mustMatch(lbrace); err != nil { + return nil, err + } + + var expectStmt bool + stmts := make([]statementExpr, 0) + + stmt, err := parseStatement(toks) + if err != nil { + return nil, err + } + if stmt != nil { + stmts = append(stmts, stmt) + } else { + expectStmt = true + } + + for stmt != nil { + stmt, err = parseStatement(toks) + if err != nil { + return nil, err + } + if stmt == nil { + expectStmt = true + break + } + + stmts = append(stmts, stmt) + } + + if err := toks.mustMatch(rbrace); err != nil { + if expectStmt { + return nil, expectedStatement{got: toks.current()} + } + + return nil, err + } + + return &blockExpr{stmts: stmts}, nil +} + +func parseStatement(toks *tokens) (statementExpr, error) { + stmt, err := parseReturnStmt(toks) + if err != nil { + return nil, err + } + if stmt != nil { + if err := toks.mustMatch(semicolon); err != nil { + return nil, err + } + + return stmt, nil + } + + return nil, nil +} + +func parseReturnStmt(toks *tokens) (*returnStmt, error) { + ok, err := toks.match(ret) + if err != nil { + return nil, err + } + if !ok { + toks.unreadToken() + return nil, nil + } + + expr, err := parseExpression(toks) + if err != nil { + return nil, err + } + + return &returnStmt{value: expr, ln: toks.current().line}, nil +} + +func parseExpression(toks *tokens) (exprExpr, error) { + return parseEquality(toks) +} + +func parseEquality(toks *tokens) (*equalityExpr, error) { + lhs, err := parseComparison(toks) + if err != nil { + return nil, err + } + + line := toks.current().line + + rhss := make([]*equalityRhs, 0) + for { + rhs, err := parseEqualityRhs(toks) + if err != nil { + return nil, err + } + if rhs == nil { + break + } + + rhss = append(rhss, rhs) + } + + return &equalityExpr{lhs: lhs, rhs: rhss, ln: line}, nil +} + +func parseEqualityRhs(toks *tokens) (*equalityRhs, error) { + eq, err := toks.match(doubleEquals) + if err != nil { + return nil, err + } + if !eq { + toks.unreadToken() + } + + neq, err := toks.match(bangEquals) + if err != nil { + return nil, err + } + if !neq { + toks.unreadToken() + return nil, nil + } + + value, err := parseComparison(toks) + if err != nil { + return nil, err + } + + var op equalityOp + if eq { + op = equalTo + } else if neq { + op = notEqualTo + } + + return &equalityRhs{op: op, value: value}, nil +} + +func parseComparison(toks *tokens) (*comparisonExpr, error) { + lhs, err := parseTerm(toks) + if err != nil { + return nil, err + } + + line := toks.current().line + + rhs, err := parseComparisonRhs(toks) + if err != nil { + return nil, err + } + + return &comparisonExpr{lhs: lhs, rhs: rhs, ln: line}, nil +} + +func parseComparisonRhs(toks *tokens) (*comparisonRhs, error) { + lt, err := toks.match(langle) + if err != nil { + return nil, err + } + if !lt { + toks.unreadToken() + } + + le, err := toks.match(langleEquals) + if err != nil { + return nil, err + } + if !le { + toks.unreadToken() + } + + gt, err := toks.match(rangle) + if err != nil { + return nil, err + } + if !gt { + toks.unreadToken() + } + + ge, err := toks.match(rangleEquals) + if err != nil { + return nil, err + } + if !ge { + toks.unreadToken() + return nil, nil + } + + value, err := parseTerm(toks) + if err != nil { + return nil, err + } + + var op comparisonOp + if lt { + op = lessThan + } else if le { + op = lessThanOrEqualTo + } else if gt { + op = greaterThan + } else if ge { + op = greaterThanOrEqualTo + } + + return &comparisonRhs{op: op, value: value}, nil +} + +func parseTerm(toks *tokens) (*termExpr, error) { + lhs, err := parseNumeral(toks) + if err != nil { + return nil, err + } + line := toks.current().line + + rhss := make([]*termRhs, 0) + for { + rhs, err := parseTermRhs(toks) + if err != nil { + return nil, err + } + if rhs == nil { + break + } + + rhss = append(rhss, rhs) + } + + return &termExpr{lhs: lhs, rhs: rhss, ln: line}, nil +} + +func parseTermRhs(toks *tokens) (*termRhs, error) { + lsh, err := toks.match(doubleLangle) + if err != nil { + return nil, err + } + if !lsh { + toks.unreadToken() + } + + rsh, err := toks.match(doubleRangle) + if err != nil { + return nil, err + } + if !rsh { + toks.unreadToken() + return nil, nil + } + + value, err := parseNumeral(toks) + if err != nil { + return nil, err + } + + var op shiftOp + if lsh { + op = shiftLeft + } else if rsh { + op = shiftRight + } + + return &termRhs{op: op, value: value}, nil +} + +func parseNumeral(toks *tokens) (*numeralExpr, error) { + lhs, err := parseFactor(toks) + if err != nil { + return nil, err + } + + line := toks.current().line + + rhss := make([]*numeralRhs, 0) + for { + rhs, err := parseNumeralRhs(toks) + if err != nil { + return nil, err + } + if rhs == nil { + break + } + + rhss = append(rhss, rhs) + } + + return &numeralExpr{lhs: lhs, rhs: rhss, ln: line}, nil +} + +func parseNumeralRhs(toks *tokens) (*numeralRhs, error) { + doAdd, err := toks.match(plus) + if err != nil { + return nil, err + } + var doSubtract bool + if !doAdd { + toks.unreadToken() + + doSubtract, err = toks.match(minus) + if err != nil { + return nil, err + } + + if !doSubtract { + toks.unreadToken() + return nil, nil + } + } + + value, err := parseFactor(toks) + if err != nil { + return nil, err + } + + var op addSubOp + if doAdd { + op = add + } else if doSubtract { + op = subtract + } + + return &numeralRhs{op: op, value: value}, nil +} + +func parseFactor(toks *tokens) (*factorExpr, error) { + lhs, err := parseUnary(toks) + if err != nil { + return nil, err + } + + line := toks.current().line + + rhss := make([]*factorRhs, 0) + for { + rhs, err := parseFactorRhs(toks) + if err != nil { + return nil, err + } + if rhs == nil { + break + } + + rhss = append(rhss, rhs) + } + + return &factorExpr{lhs: lhs, rhs: rhss, ln: line}, nil +} + +func parseFactorRhs(toks *tokens) (*factorRhs, error) { + mul, err := toks.match(star) + if err != nil { + return nil, err + } + var div bool + if !mul { + toks.unreadToken() + + div, err = toks.match(slash) + if err != nil { + return nil, err + } + if !div { + toks.unreadToken() + return nil, nil + } + } + + value, err := parseUnary(toks) + if err != nil { + return nil, err + } + + var op mulDivOp + if mul { + op = multiply + } else if div { + op = divide + } + + return &factorRhs{op: op, value: value}, nil +} + +func parseUnary(toks *tokens) (*unaryExpr, error) { + neg, err := toks.match(minus) + if err != nil { + return nil, err + } + line := toks.current().line + if !neg { + toks.unreadToken() + } + + invLog, err := toks.match(bang) + if err != nil { + return nil, err + } + if !invLog { + toks.unreadToken() + } + + invBits, err := toks.match(tilde) + if err != nil { + return nil, err + } + if !invBits { + toks.unreadToken() + } + + value, err := parsePrimary(toks) + if err != nil { + return nil, err + } + + var op unaryOp + if neg { + op = negate + } else if invLog { + op = invertLogical + } else if invBits { + op = invertBits + } + + return &unaryExpr{op: op, value: value, ln: line}, nil +} + +func parsePrimary(toks *tokens) (primaryExpr, error) { + grp, err := parseGrouping(toks) + if err != nil { + return nil, err + } + if grp != nil { + return grp, nil + } + + lit, err := parseLiteral(toks) + if err != nil { + return nil, err + } + if lit != nil { + return lit, nil + } + + return nil, expectedPrimary{got: toks.current()} +} + +func parseGrouping(toks *tokens) (*groupingExpr, error) { + ok, err := toks.match(lparen) + if err != nil { + return nil, err + } + if !ok { + toks.unreadToken() + return nil, nil + } + + line := toks.current().line + + expr, err := parseExpression(toks) + if err != nil { + return nil, err + } + + if err := toks.mustMatch(rparen); err != nil { + return nil, err + } + + return &groupingExpr{inner: expr, ln: line}, nil +} + +func parseLiteral(toks *tokens) (literalExpr, error) { + s, err := parseString(toks) + if err != nil { + return nil, err + } + if s != nil { + return s, nil + } + + num, err := parseNumber(toks) + if err != nil { + return nil, err + } + if num != nil { + return num, nil + } + + return nil, nil +} + +func parseString(toks *tokens) (*stringExpr, error) { + tok, ok := toks.consumeToken() + if !ok { + return nil, unexpectedEOF + } + toks.unreadToken() + + segments := make([]string, 0) + for { + segment, err := parseStringSegment(toks) + if err != nil { + return nil, err + } + if segment == nil { + break + } + + segments = append(segments, *segment) + } + + if len(segments) == 0 { + return nil, nil + } + + return &stringExpr{segments: segments, ln: tok.line}, nil +} + +func parseStringSegment(toks *tokens) (*string, error) { + tok, ok := toks.consumeToken() + if !ok { + return nil, unexpectedEOF + } + + if tok.kind != str { + toks.unreadToken() + return nil, nil + } + + return &tok.value, nil +} + +func parseNumber(toks *tokens) (*numberExpr, error) { + typeTok, ok := toks.consumeToken() + if !ok { + return nil, unexpectedEOF + } + + if typeTok.kind != identifier { + return nil, expected{want: identifier, got: typeTok} + } + + if err := toks.mustMatch(lparen); err != nil { + return nil, err + } + + numTok, ok := toks.consumeToken() + if !ok { + return nil, unexpectedEOF + } + + if numTok.kind != number { + return nil, expected{want: number, got: numTok} + } + + if err := toks.mustMatch(rparen); err != nil { + return nil, err + } + + return &numberExpr{typ: typeTok.value, s: numTok.value, ln: typeTok.line}, nil +} diff --git a/parse_error.go b/parse_error.go new file mode 100644 index 0000000..9bd778f --- /dev/null +++ b/parse_error.go @@ -0,0 +1,44 @@ +package main + +import "fmt" + +type expected struct { + want tokenKind + got token +} + +func (e expected) Error() string { + return fmt.Sprintf("%d: expected %s, got %s", e.got.line, e.want, e.got) +} + +type expectedToplevel struct { + got token +} + +func (e expectedToplevel) Error() string { + return fmt.Sprintf("%d: expected top-level declaration, got %s", e.got.line, e.got) +} + +type expectedStatement struct { + got token +} + +func (e expectedStatement) Error() string { + return fmt.Sprintf("%d: expected statement, got %s", e.got.line, e.got) +} + +type expectedUnaryOperand struct { + got token +} + +func (e expectedUnaryOperand) Error() string { + return fmt.Sprintf("%d: expected unary operand, got %s", e.got.line, e.got) +} + +type expectedPrimary struct { + got token +} + +func (e expectedPrimary) Error() string { + return fmt.Sprintf("%d: expected primary expression, got %s", e.got.line, e.got) +} diff --git a/token.go b/token.go new file mode 100644 index 0000000..a33a554 --- /dev/null +++ b/token.go @@ -0,0 +1,172 @@ +package main + +type tokenKind int + +func (k tokenKind) String() string { + switch k { + case plusEquals: + return "+=" + case plus: + return "+" + case minusEquals: + return "-=" + case minus: + return "-" + case starEquals: + return "*=" + case star: + return "*" + case slashEquals: + return "/=" + case slash: + return "/" + case lparen: + return "(" + case rparen: + return ")" + case lbracket: + return "[" + case rbracket: + return "]" + case lbrace: + return "{" + case rbrace: + return "}" + case comma: + return "," + case dot: + return "." + case colon: + return ":" + case semicolon: + return ";" + case doubleEquals: + return "==" + case equals: + return "=" + case bangEquals: + return "!=" + case bang: + return "!" + case tildeEquals: + return "~=" + case tilde: + return "~" + case caretEquals: + return "^=" + case caret: + return "^" + case doubleAmpersand: + return "&&" + case ampersandEquals: + return "&=" + case ampersand: + return "&" + case doublePipe: + return "||" + case pipeEquals: + return "|=" + case pipe: + return "|" + case doubleLangleEquals: + return "<<=" + case doubleLangle: + return "<<" + case langleEquals: + return "<=" + case langle: + return "<" + case doubleRangleEquals: + return ">>=" + case doubleRangle: + return ">>" + case rangleEquals: + return ">=" + case rangle: + return ">" + case identifier: + return "identifier" + case str: + return "string" + case number: + return "number" + case export: + return "export" + case function: + return "func" + case ret: + return "return" + } + + return "invalid token" +} + +const ( + none tokenKind = iota + plusEquals + plus + minusEquals + minus + starEquals + star + slashEquals + slash + lparen + rparen + lbracket + rbracket + lbrace + rbrace + comma + dot + colon + semicolon + doubleEquals + equals + bangEquals + bang + tildeEquals + tilde + caretEquals + caret + doubleAmpersand + ampersandEquals + ampersand + doublePipe + pipeEquals + pipe + doubleLangleEquals + doubleLangle + langleEquals + langle + doubleRangleEquals + doubleRangle + rangleEquals + rangle + identifier + str + number + export + function + ret +) + +type token struct { + kind tokenKind + value string + line int +} + +func (t token) String() string { + if t.value != "" { + return t.kind.String() + " " + t.value + } else { + return t.kind.String() + } +} + +type tokens struct { + src <-chan token + toks []token + index int +} |