diff options
author | Himbeer <himbeer@disroot.org> | 2025-05-06 11:53:56 +0200 |
---|---|---|
committer | Himbeer <himbeer@disroot.org> | 2025-05-06 11:53:56 +0200 |
commit | 773d0876aca8ddcc3e37caca8a63f62249d0b3d9 (patch) | |
tree | 5257f1491529bc62ed8c7dc0c0d8d9b3ba188c4f | |
parent | 1d19bd1c66f8aa68848404a4b724dc3ca795b0da (diff) |
Implement toplevel parsing
-rw-r--r-- | include/util.h | 5 | ||||
-rw-r--r-- | src/parse.c | 394 |
2 files changed, 371 insertions, 28 deletions
diff --git a/include/util.h b/include/util.h index 0e2b289..70ae619 100644 --- a/include/util.h +++ b/include/util.h @@ -11,11 +11,6 @@ enum exit_status { EXIT_ABNORMAL = 255, }; -struct path { - const char **segments; - int segsz, seglen; -}; - void *must_malloc(size_t size); void *must_calloc(size_t nmemb, size_t size); void *must_realloc(void *ptr, size_t size); diff --git a/src/parse.c b/src/parse.c index 3d01562..a9d99b0 100644 --- a/src/parse.c +++ b/src/parse.c @@ -10,7 +10,7 @@ static noreturn void error(struct location loc, const char *fmt, ...) { - fprintf(stderr, "%s:%d:%d ", loc.file, loc.line, loc.column); + fprintf(stderr, "%s:%d:%d: ", loc.file, loc.line, loc.column); va_list ap; va_start(ap, fmt); @@ -21,44 +21,392 @@ error(struct location loc, const char *fmt, ...) exit(EXIT_PARSE); } -static void -append(struct path *path, const char *name) +static int +parse_include(struct lexer *lexer, struct ast_include *inc) { - if (path->seglen >= path->segsz) { - path->segsz *= 2; - size_t sz = sizeof(const char *) * path->segsz; - path->segments = must_realloc(path->segments, sz); + inc->loc = lex_loc(lexer); + + if (!match(lexer, T_INCLUDE)) { + return 0; + } + + struct token pathtok; + if (lex(lexer, &pathtok) != T_STRING) { + error(pathtok.loc, "expected string literal after 'include'"); + } + inc->path = pathtok.info.str; + + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), "expected semicolon"); + } + + return 1; +} + +static int +parse_global(struct lexer *lexer, struct ast_global *g) +{ + g->loc = lex_loc(lexer); + + if (!match(lexer, T_VAR)) { + return 0; + } + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + error(nametok.loc, "expected variable name after 'var'"); + } + g->name = nametok.info.str; + + g->ispub = match(lexer, T_PUB); + g->isextern = match(lexer, T_EXTERN); + + if (!parse_type(lexer, &g->type)) { + error(lex_loc(lexer), "expected type"); + } + + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), "expected semicolon"); + } + + return 1; +} + +static int +parse_const(struct lexer *lexer, struct ast_const *c) +{ + c->loc = lex_loc(lexer); + + if (!match(lexer, T_CONST)) { + return 0; + } + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + error(nametok.loc, "expected constant name after 'const'"); + } + c->name = nametok.info.str; + + c->ispub = match(lexer, T_PUB); + + if (!parse_expr(lexer, &c->value)) { + error(lex_loc(lexer), "expected expression"); + } + + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), "expected semicolon"); + } + + return 1; +} + +static int +parse_enumvar(struct lexer *lexer, struct ast_enumvar *ev) +{ + ev->loc = lex_loc(lexer); + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + unlex(lexer, &nametok); + return 0; + } + ev->name = nametok.info.str; + + ev->override = NULL; + if (!match(lexer, T_ASSIGN)) { + return 1; + } + ev->override = must_malloc(sizeof(struct expr)); + + if (!parse_expr(lexer, ev->override)) { + error(lex_loc(lexer), "expected expression after '=' in enum variant"); + } + + return 1; +} + +static int +parse_enum(struct lexer *lexer, struct ast_enum *en) +{ + en->loc = lex_loc(lexer); + + if (!match(lexer, T_ENUM)) { + return 0; + } + + if (!parse_expr(lexer, &en->init)) { + error(lex_loc(lexer), "expected default initialization expression"); + } + + en->ispub = match(lexer, T_PUB); + + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "expected '{'"); + } + + en->variants = must_calloc(1, sizeof(struct ast_enumvar)); + en->variantsz = 1; + en->variantlen = 0; + + struct ast_enumvar v; + while (parse_enumvar(lexer, &v)) { + if (!match(lexer, T_COMMA)) { + error(lex_loc(lexer), "expected (trailing) comma after enum variant"); + } + + if (en->variantlen >= en->variantsz) { + en->variantsz *= 2; + size_t newsz = en->variantsz * sizeof(struct ast_enumvar); + en->variants = must_realloc(en->variants, newsz); + } + en->variants[en->variantlen++] = v; + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "expected '}'"); + } + + return 1; +} + +static int +parse_field(struct lexer *lexer, struct ast_field *f) +{ + f->loc = lex_loc(lexer); + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + unlex(lexer, &nametok); + return 0; + } + f->name = nametok.info.str; + + if (!parse_type(lexer, &f->type)) { + error(lex_loc(lexer), "expected field type"); + } + + return 1; +} + +static int +parse_struct(struct lexer *lexer, struct ast_struct *s) +{ + s->loc = lex_loc(lexer); + + if (!match(lexer, T_STRUCT)) { + return 0; + } + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + error(nametok.loc, "expected struct name"); + } + s->name = nametok.info.str; + + s->align = 0; + if (match(lexer, T_ALIGN)) { + struct token aligntok; + if (lex(lexer, &aligntok) != T_NUMBER) { + error(aligntok.loc, "expected alignment (integer literal)"); + } + if (aligntok.info.num.isfloat) { + error(aligntok.loc, "alignment must be an integer, not a float"); + } + s->align = aligntok.info.num.value.integer; + } + + s->ispub = match(lexer, T_PUB); + + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "expected '{'"); } - path->segments[path->seglen++] = name; + s->fields = must_calloc(1, sizeof(struct ast_field)); + s->fieldsz = 1; + s->fieldlen = 0; + + struct ast_field f; + while (parse_field(lexer, &f)) { + if (!match(lexer, T_COMMA)) { + error(lex_loc(lexer), "expected (trailing) comma after struct field"); + } + + if (s->fieldlen >= s->fieldsz) { + s->fieldsz *= 2; + size_t newsz = s->fieldsz * sizeof(struct ast_field); + s->fields = must_realloc(s->fields, newsz); + } + s->fields[s->fieldlen++] = f; + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "expected '}'"); + } + + return 1; } -static bool -parse_path(struct lexer *lexer, struct path *out) +static int +parse_union(struct lexer *lexer, struct ast_union *un) { - struct token name; + un->loc = lex_loc(lexer); + + if (!match(lexer, T_UNION)) { + return 0; + } + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + error(nametok.loc, "expected union name"); + } + un->name = nametok.info.str; + + un->ispub = match(lexer, T_PUB); - if (lex(lexer, &name) != T_NAME) { - unlex(lexer, &name); - return false; + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "expected '{'"); } - out->segments = must_calloc(1, sizeof(const char *)); - out->segments[0] = name.info.str; - out->seglen = 1; - out->segsz = 1; + un->types = must_calloc(1, sizeof(struct type)); + un->typesz = 1; + un->typelen = 0; - while (match(lexer, T_MODDELIM)) { - if (lex(lexer, &name) != T_NAME) { - error(name.loc, "syntax error: expected name"); + struct type type; + while (parse_type(lexer, &type)) { + if (!match(lexer, T_COMMA)) { + error(lex_loc(lexer), "expected (trailing) comma after union variant"); } - append(out, name.info.str); + + if (un->typelen >= un->typesz) { + un->typesz *= 2; + size_t newsz = un->typesz * sizeof(struct type); + un->types = must_realloc(un->types, newsz); + } + un->types[un->typelen++] = type; + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "expected '}'"); } - return true; + return 1; +} + +static int +parse_func(struct lexer *lexer, struct ast_func *fn) +{ + fn->loc = lex_loc(lexer); + + if (!match(lexer, T_FUNC)) { + return 0; + } + + fn->ispub = match(lexer, T_PUB); + fn->isextern = match(lexer, T_EXTERN); + + struct token nametok; + if (lex(lexer, &nametok) != T_NAME) { + error(nametok.loc, "expected function name"); + } + fn->name = nametok.info.str; + + if (!match(lexer, T_LPAREN)) { + error(lex_loc(lexer), "expected '('"); + } + + fn->params = must_calloc(1, sizeof(struct ast_field)); + fn->paramsz = 1; + fn->paramlen = 0; + + struct ast_field f; + while (parse_field(lexer, &f)) { + if (!match(lexer, T_COMMA)) { + error(lex_loc(lexer), "expected (trailing) comma after function parameter"); + } + + if (fn->paramlen >= fn->paramsz) { + fn->paramsz *= 2; + size_t newsz = fn->paramsz * sizeof(struct ast_field); + fn->params = must_realloc(fn->params, newsz); + } + fn->params[fn->paramlen++] = f; + } + + if (!match(lexer, T_RPAREN)) { + error(lex_loc(lexer), "expected ')'"); + } + + fn->ret = must_malloc(sizeof(struct type)); + if (!parse_type(lexer, fn->ret)) { + free(fn->ret); + fn->ret = NULL; + } + + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "expected '{'"); + } + + fn->stmts = must_calloc(1, sizeof(struct ast_stmt)); + fn->stmtsz = 1; + fn->stmtlen = 0; + + struct ast_stmt stmt; + while (parse_stmt(lexer, &stmt)) { + if (fn->stmtlen >= fn->stmtsz) { + fn->stmtsz *= 2; + size_t newsz = fn->stmtsz * sizeof(struct ast_stmt); + fn->stmts = must_realloc(fn->stmts, newsz); + } + fn->stmts[fn->stmtlen++] = stmt; + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "expected '}'"); + } + + return 1; +} + +static int +parse_toplevel(struct lexer *lexer, struct ast_toplevel *tl) +{ + tl->loc = lex_loc(lexer); + + if (parse_include(lexer, &tl->tl.include)) { + tl->kind = TOP_INCLUDE; + } else if (parse_global(lexer, &tl->tl.globalvar)) { + tl->kind = TOP_GLOBAL; + } else if (parse_const(lexer, &tl->tl.constant)) { + tl->kind = TOP_CONST; + } else if (parse_enum(lexer, &tl->tl.enumconst)) { + tl->kind = TOP_ENUM; + } else if (parse_struct(lexer, &tl->tl.structdecl)) { + tl->kind = TOP_STRUCT; + } else if (parse_union(lexer, &tl->tl.uniondecl)) { + tl->kind = TOP_UNION; + } else if (parse_func(lexer, &tl->tl.function)) { + tl->kind = TOP_FUNC; + } else { + return 0; + } + + return 1; } void parse(struct lexer *lexer, struct ast_unit *u) { + u->tops = must_calloc(1, sizeof(struct ast_toplevel)); + u->topsz = 1; + u->toplen = 0; + + struct ast_toplevel tl; + while (parse_toplevel(lexer, &tl)) { + if (u->toplen >= u->topsz) { + u->topsz *= 2; + size_t newsz = u->topsz * sizeof(struct ast_toplevel); + u->tops = must_realloc(u->tops, newsz); + } + u->tops[u->toplen++] = tl; + } } |