#define _POSIX_C_SOURCE 200809L #include #include #include #include #include "parse.h" #include "util.h" static noreturn void error(struct location loc, const char *fmt, ...) { fprintf(stderr, "%s:%d:%d: ", loc.file, loc.line, loc.column); va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); exit(EXIT_PARSE); } static int parse_include(struct lexer *lexer, struct ast_include *inc) { inc->loc = lex_loc(lexer); if (!match(lexer, T_INCLUDE)) { return 0; } struct token pathtok; if (lex(lexer, &pathtok) != T_STRING) { error(pathtok.loc, "expected string literal after 'include'"); } inc->path = pathtok.info.str; if (!match(lexer, T_SEMICOLON)) { error(lex_loc(lexer), "expected semicolon"); } return 1; } static int parse_global(struct lexer *lexer, struct ast_global *g) { g->loc = lex_loc(lexer); if (!match(lexer, T_VAR)) { return 0; } struct token nametok; if (lex(lexer, &nametok) != T_NAME) { error(nametok.loc, "expected variable name after 'var'"); } g->name = nametok.info.str; g->ispub = match(lexer, T_PUB); g->isextern = match(lexer, T_EXTERN); if (!parse_type(lexer, &g->type)) { error(lex_loc(lexer), "expected type"); } if (!match(lexer, T_SEMICOLON)) { error(lex_loc(lexer), "expected semicolon"); } return 1; } static int parse_const(struct lexer *lexer, struct ast_const *c) { c->loc = lex_loc(lexer); if (!match(lexer, T_CONST)) { return 0; } struct token nametok; if (lex(lexer, &nametok) != T_NAME) { error(nametok.loc, "expected constant name after 'const'"); } c->name = nametok.info.str; c->ispub = match(lexer, T_PUB); if (!parse_expr(lexer, &c->value)) { error(lex_loc(lexer), "expected expression"); } if (!match(lexer, T_SEMICOLON)) { error(lex_loc(lexer), "expected semicolon"); } return 1; } static int parse_enumvar(struct lexer *lexer, struct ast_enumvar *ev) { ev->loc = lex_loc(lexer); struct token nametok; if (lex(lexer, &nametok) != T_NAME) { unlex(lexer, &nametok); return 0; } ev->name = nametok.info.str; ev->override = NULL; if (!match(lexer, T_ASSIGN)) { return 1; } ev->override = must_malloc(sizeof(struct expr)); if (!parse_expr(lexer, ev->override)) { error(lex_loc(lexer), "expected expression after '=' in enum variant"); } return 1; } static int parse_enum(struct lexer *lexer, struct ast_enum *en) { en->loc = lex_loc(lexer); if (!match(lexer, T_ENUM)) { return 0; } if (!parse_expr(lexer, &en->init)) { error(lex_loc(lexer), "expected default initialization expression"); } en->ispub = match(lexer, T_PUB); if (!match(lexer, T_LBRACE)) { error(lex_loc(lexer), "expected '{'"); } en->variants = must_calloc(1, sizeof(struct ast_enumvar)); en->variantsz = 1; en->variantlen = 0; struct ast_enumvar v; while (parse_enumvar(lexer, &v)) { if (!match(lexer, T_COMMA)) { error(lex_loc(lexer), "expected (trailing) comma after enum variant"); } if (en->variantlen >= en->variantsz) { en->variantsz *= 2; size_t newsz = en->variantsz * sizeof(struct ast_enumvar); en->variants = must_realloc(en->variants, newsz); } en->variants[en->variantlen++] = v; } if (!match(lexer, T_RBRACE)) { error(lex_loc(lexer), "expected '}'"); } return 1; } static int parse_field(struct lexer *lexer, struct ast_field *f) { f->loc = lex_loc(lexer); struct token nametok; if (lex(lexer, &nametok) != T_NAME) { unlex(lexer, &nametok); return 0; } f->name = nametok.info.str; if (!parse_type(lexer, &f->type)) { error(lex_loc(lexer), "expected field type"); } return 1; } static int parse_struct(struct lexer *lexer, struct ast_struct *s) { s->loc = lex_loc(lexer); if (!match(lexer, T_STRUCT)) { return 0; } struct token nametok; if (lex(lexer, &nametok) != T_NAME) { error(nametok.loc, "expected struct name"); } s->name = nametok.info.str; s->align = 0; if (match(lexer, T_ALIGN)) { struct token aligntok; if (lex(lexer, &aligntok) != T_NUMBER) { error(aligntok.loc, "expected alignment (integer literal)"); } if (aligntok.info.num.isfloat) { error(aligntok.loc, "alignment must be an integer, not a float"); } s->align = aligntok.info.num.value.integer; } s->ispub = match(lexer, T_PUB); if (!match(lexer, T_LBRACE)) { error(lex_loc(lexer), "expected '{'"); } s->fields = must_calloc(1, sizeof(struct ast_field)); s->fieldsz = 1; s->fieldlen = 0; struct ast_field f; while (parse_field(lexer, &f)) { if (!match(lexer, T_COMMA)) { error(lex_loc(lexer), "expected (trailing) comma after struct field"); } if (s->fieldlen >= s->fieldsz) { s->fieldsz *= 2; size_t newsz = s->fieldsz * sizeof(struct ast_field); s->fields = must_realloc(s->fields, newsz); } s->fields[s->fieldlen++] = f; } if (!match(lexer, T_RBRACE)) { error(lex_loc(lexer), "expected '}'"); } return 1; } static int parse_union(struct lexer *lexer, struct ast_union *un) { un->loc = lex_loc(lexer); if (!match(lexer, T_UNION)) { return 0; } struct token nametok; if (lex(lexer, &nametok) != T_NAME) { error(nametok.loc, "expected union name"); } un->name = nametok.info.str; un->ispub = match(lexer, T_PUB); if (!match(lexer, T_LBRACE)) { error(lex_loc(lexer), "expected '{'"); } un->types = must_calloc(1, sizeof(struct type)); un->typesz = 1; un->typelen = 0; struct type type; while (parse_type(lexer, &type)) { if (!match(lexer, T_COMMA)) { error(lex_loc(lexer), "expected (trailing) comma after union variant"); } if (un->typelen >= un->typesz) { un->typesz *= 2; size_t newsz = un->typesz * sizeof(struct type); un->types = must_realloc(un->types, newsz); } un->types[un->typelen++] = type; } if (!match(lexer, T_RBRACE)) { error(lex_loc(lexer), "expected '}'"); } return 1; } static int parse_func(struct lexer *lexer, struct ast_func *fn) { fn->loc = lex_loc(lexer); if (!match(lexer, T_FUNC)) { return 0; } fn->ispub = match(lexer, T_PUB); fn->isextern = match(lexer, T_EXTERN); struct token nametok; if (lex(lexer, &nametok) != T_NAME) { error(nametok.loc, "expected function name"); } fn->name = nametok.info.str; if (!match(lexer, T_LPAREN)) { error(lex_loc(lexer), "expected '('"); } fn->params = must_calloc(1, sizeof(struct ast_field)); fn->paramsz = 1; fn->paramlen = 0; struct ast_field f; while (parse_field(lexer, &f)) { if (!match(lexer, T_COMMA)) { error(lex_loc(lexer), "expected (trailing) comma after function parameter"); } if (fn->paramlen >= fn->paramsz) { fn->paramsz *= 2; size_t newsz = fn->paramsz * sizeof(struct ast_field); fn->params = must_realloc(fn->params, newsz); } fn->params[fn->paramlen++] = f; } if (!match(lexer, T_RPAREN)) { error(lex_loc(lexer), "expected ')'"); } fn->ret = must_malloc(sizeof(struct type)); if (!parse_type(lexer, fn->ret)) { free(fn->ret); fn->ret = NULL; } if (!match(lexer, T_LBRACE)) { error(lex_loc(lexer), "expected '{'"); } fn->stmts = must_calloc(1, sizeof(struct ast_stmt)); fn->stmtsz = 1; fn->stmtlen = 0; struct ast_stmt stmt; while (parse_stmt(lexer, &stmt)) { if (fn->stmtlen >= fn->stmtsz) { fn->stmtsz *= 2; size_t newsz = fn->stmtsz * sizeof(struct ast_stmt); fn->stmts = must_realloc(fn->stmts, newsz); } fn->stmts[fn->stmtlen++] = stmt; } if (!match(lexer, T_RBRACE)) { error(lex_loc(lexer), "expected '}'"); } return 1; } static int parse_toplevel(struct lexer *lexer, struct ast_toplevel *tl) { tl->loc = lex_loc(lexer); if (parse_include(lexer, &tl->tl.include)) { tl->kind = TOP_INCLUDE; } else if (parse_global(lexer, &tl->tl.globalvar)) { tl->kind = TOP_GLOBAL; } else if (parse_const(lexer, &tl->tl.constant)) { tl->kind = TOP_CONST; } else if (parse_enum(lexer, &tl->tl.enumconst)) { tl->kind = TOP_ENUM; } else if (parse_struct(lexer, &tl->tl.structdecl)) { tl->kind = TOP_STRUCT; } else if (parse_union(lexer, &tl->tl.uniondecl)) { tl->kind = TOP_UNION; } else if (parse_func(lexer, &tl->tl.function)) { tl->kind = TOP_FUNC; } else { return 0; } return 1; } void parse(struct lexer *lexer, struct ast_unit *u) { u->tops = must_calloc(1, sizeof(struct ast_toplevel)); u->topsz = 1; u->toplen = 0; struct ast_toplevel tl; while (parse_toplevel(lexer, &tl)) { if (u->toplen >= u->topsz) { u->topsz *= 2; size_t newsz = u->topsz * sizeof(struct ast_toplevel); u->tops = must_realloc(u->tops, newsz); } u->tops[u->toplen++] = tl; } }