aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHimbeer <himbeer@disroot.org>2025-05-06 11:53:56 +0200
committerHimbeer <himbeer@disroot.org>2025-05-06 11:53:56 +0200
commit773d0876aca8ddcc3e37caca8a63f62249d0b3d9 (patch)
tree5257f1491529bc62ed8c7dc0c0d8d9b3ba188c4f
parent1d19bd1c66f8aa68848404a4b724dc3ca795b0da (diff)
Implement toplevel parsing
-rw-r--r--include/util.h5
-rw-r--r--src/parse.c394
2 files changed, 371 insertions, 28 deletions
diff --git a/include/util.h b/include/util.h
index 0e2b289..70ae619 100644
--- a/include/util.h
+++ b/include/util.h
@@ -11,11 +11,6 @@ enum exit_status {
EXIT_ABNORMAL = 255,
};
-struct path {
- const char **segments;
- int segsz, seglen;
-};
-
void *must_malloc(size_t size);
void *must_calloc(size_t nmemb, size_t size);
void *must_realloc(void *ptr, size_t size);
diff --git a/src/parse.c b/src/parse.c
index 3d01562..a9d99b0 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -10,7 +10,7 @@
static noreturn void
error(struct location loc, const char *fmt, ...)
{
- fprintf(stderr, "%s:%d:%d ", loc.file, loc.line, loc.column);
+ fprintf(stderr, "%s:%d:%d: ", loc.file, loc.line, loc.column);
va_list ap;
va_start(ap, fmt);
@@ -21,44 +21,392 @@ error(struct location loc, const char *fmt, ...)
exit(EXIT_PARSE);
}
-static void
-append(struct path *path, const char *name)
+static int
+parse_include(struct lexer *lexer, struct ast_include *inc)
{
- if (path->seglen >= path->segsz) {
- path->segsz *= 2;
- size_t sz = sizeof(const char *) * path->segsz;
- path->segments = must_realloc(path->segments, sz);
+ inc->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_INCLUDE)) {
+ return 0;
+ }
+
+ struct token pathtok;
+ if (lex(lexer, &pathtok) != T_STRING) {
+ error(pathtok.loc, "expected string literal after 'include'");
+ }
+ inc->path = pathtok.info.str;
+
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer), "expected semicolon");
+ }
+
+ return 1;
+}
+
+static int
+parse_global(struct lexer *lexer, struct ast_global *g)
+{
+ g->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_VAR)) {
+ return 0;
+ }
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ error(nametok.loc, "expected variable name after 'var'");
+ }
+ g->name = nametok.info.str;
+
+ g->ispub = match(lexer, T_PUB);
+ g->isextern = match(lexer, T_EXTERN);
+
+ if (!parse_type(lexer, &g->type)) {
+ error(lex_loc(lexer), "expected type");
+ }
+
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer), "expected semicolon");
+ }
+
+ return 1;
+}
+
+static int
+parse_const(struct lexer *lexer, struct ast_const *c)
+{
+ c->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_CONST)) {
+ return 0;
+ }
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ error(nametok.loc, "expected constant name after 'const'");
+ }
+ c->name = nametok.info.str;
+
+ c->ispub = match(lexer, T_PUB);
+
+ if (!parse_expr(lexer, &c->value)) {
+ error(lex_loc(lexer), "expected expression");
+ }
+
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer), "expected semicolon");
+ }
+
+ return 1;
+}
+
+static int
+parse_enumvar(struct lexer *lexer, struct ast_enumvar *ev)
+{
+ ev->loc = lex_loc(lexer);
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ unlex(lexer, &nametok);
+ return 0;
+ }
+ ev->name = nametok.info.str;
+
+ ev->override = NULL;
+ if (!match(lexer, T_ASSIGN)) {
+ return 1;
+ }
+ ev->override = must_malloc(sizeof(struct expr));
+
+ if (!parse_expr(lexer, ev->override)) {
+ error(lex_loc(lexer), "expected expression after '=' in enum variant");
+ }
+
+ return 1;
+}
+
+static int
+parse_enum(struct lexer *lexer, struct ast_enum *en)
+{
+ en->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_ENUM)) {
+ return 0;
+ }
+
+ if (!parse_expr(lexer, &en->init)) {
+ error(lex_loc(lexer), "expected default initialization expression");
+ }
+
+ en->ispub = match(lexer, T_PUB);
+
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "expected '{'");
+ }
+
+ en->variants = must_calloc(1, sizeof(struct ast_enumvar));
+ en->variantsz = 1;
+ en->variantlen = 0;
+
+ struct ast_enumvar v;
+ while (parse_enumvar(lexer, &v)) {
+ if (!match(lexer, T_COMMA)) {
+ error(lex_loc(lexer), "expected (trailing) comma after enum variant");
+ }
+
+ if (en->variantlen >= en->variantsz) {
+ en->variantsz *= 2;
+ size_t newsz = en->variantsz * sizeof(struct ast_enumvar);
+ en->variants = must_realloc(en->variants, newsz);
+ }
+ en->variants[en->variantlen++] = v;
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "expected '}'");
+ }
+
+ return 1;
+}
+
+static int
+parse_field(struct lexer *lexer, struct ast_field *f)
+{
+ f->loc = lex_loc(lexer);
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ unlex(lexer, &nametok);
+ return 0;
+ }
+ f->name = nametok.info.str;
+
+ if (!parse_type(lexer, &f->type)) {
+ error(lex_loc(lexer), "expected field type");
+ }
+
+ return 1;
+}
+
+static int
+parse_struct(struct lexer *lexer, struct ast_struct *s)
+{
+ s->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_STRUCT)) {
+ return 0;
+ }
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ error(nametok.loc, "expected struct name");
+ }
+ s->name = nametok.info.str;
+
+ s->align = 0;
+ if (match(lexer, T_ALIGN)) {
+ struct token aligntok;
+ if (lex(lexer, &aligntok) != T_NUMBER) {
+ error(aligntok.loc, "expected alignment (integer literal)");
+ }
+ if (aligntok.info.num.isfloat) {
+ error(aligntok.loc, "alignment must be an integer, not a float");
+ }
+ s->align = aligntok.info.num.value.integer;
+ }
+
+ s->ispub = match(lexer, T_PUB);
+
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "expected '{'");
}
- path->segments[path->seglen++] = name;
+ s->fields = must_calloc(1, sizeof(struct ast_field));
+ s->fieldsz = 1;
+ s->fieldlen = 0;
+
+ struct ast_field f;
+ while (parse_field(lexer, &f)) {
+ if (!match(lexer, T_COMMA)) {
+ error(lex_loc(lexer), "expected (trailing) comma after struct field");
+ }
+
+ if (s->fieldlen >= s->fieldsz) {
+ s->fieldsz *= 2;
+ size_t newsz = s->fieldsz * sizeof(struct ast_field);
+ s->fields = must_realloc(s->fields, newsz);
+ }
+ s->fields[s->fieldlen++] = f;
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "expected '}'");
+ }
+
+ return 1;
}
-static bool
-parse_path(struct lexer *lexer, struct path *out)
+static int
+parse_union(struct lexer *lexer, struct ast_union *un)
{
- struct token name;
+ un->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_UNION)) {
+ return 0;
+ }
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ error(nametok.loc, "expected union name");
+ }
+ un->name = nametok.info.str;
+
+ un->ispub = match(lexer, T_PUB);
- if (lex(lexer, &name) != T_NAME) {
- unlex(lexer, &name);
- return false;
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "expected '{'");
}
- out->segments = must_calloc(1, sizeof(const char *));
- out->segments[0] = name.info.str;
- out->seglen = 1;
- out->segsz = 1;
+ un->types = must_calloc(1, sizeof(struct type));
+ un->typesz = 1;
+ un->typelen = 0;
- while (match(lexer, T_MODDELIM)) {
- if (lex(lexer, &name) != T_NAME) {
- error(name.loc, "syntax error: expected name");
+ struct type type;
+ while (parse_type(lexer, &type)) {
+ if (!match(lexer, T_COMMA)) {
+ error(lex_loc(lexer), "expected (trailing) comma after union variant");
}
- append(out, name.info.str);
+
+ if (un->typelen >= un->typesz) {
+ un->typesz *= 2;
+ size_t newsz = un->typesz * sizeof(struct type);
+ un->types = must_realloc(un->types, newsz);
+ }
+ un->types[un->typelen++] = type;
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "expected '}'");
}
- return true;
+ return 1;
+}
+
+static int
+parse_func(struct lexer *lexer, struct ast_func *fn)
+{
+ fn->loc = lex_loc(lexer);
+
+ if (!match(lexer, T_FUNC)) {
+ return 0;
+ }
+
+ fn->ispub = match(lexer, T_PUB);
+ fn->isextern = match(lexer, T_EXTERN);
+
+ struct token nametok;
+ if (lex(lexer, &nametok) != T_NAME) {
+ error(nametok.loc, "expected function name");
+ }
+ fn->name = nametok.info.str;
+
+ if (!match(lexer, T_LPAREN)) {
+ error(lex_loc(lexer), "expected '('");
+ }
+
+ fn->params = must_calloc(1, sizeof(struct ast_field));
+ fn->paramsz = 1;
+ fn->paramlen = 0;
+
+ struct ast_field f;
+ while (parse_field(lexer, &f)) {
+ if (!match(lexer, T_COMMA)) {
+ error(lex_loc(lexer), "expected (trailing) comma after function parameter");
+ }
+
+ if (fn->paramlen >= fn->paramsz) {
+ fn->paramsz *= 2;
+ size_t newsz = fn->paramsz * sizeof(struct ast_field);
+ fn->params = must_realloc(fn->params, newsz);
+ }
+ fn->params[fn->paramlen++] = f;
+ }
+
+ if (!match(lexer, T_RPAREN)) {
+ error(lex_loc(lexer), "expected ')'");
+ }
+
+ fn->ret = must_malloc(sizeof(struct type));
+ if (!parse_type(lexer, fn->ret)) {
+ free(fn->ret);
+ fn->ret = NULL;
+ }
+
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "expected '{'");
+ }
+
+ fn->stmts = must_calloc(1, sizeof(struct ast_stmt));
+ fn->stmtsz = 1;
+ fn->stmtlen = 0;
+
+ struct ast_stmt stmt;
+ while (parse_stmt(lexer, &stmt)) {
+ if (fn->stmtlen >= fn->stmtsz) {
+ fn->stmtsz *= 2;
+ size_t newsz = fn->stmtsz * sizeof(struct ast_stmt);
+ fn->stmts = must_realloc(fn->stmts, newsz);
+ }
+ fn->stmts[fn->stmtlen++] = stmt;
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "expected '}'");
+ }
+
+ return 1;
+}
+
+static int
+parse_toplevel(struct lexer *lexer, struct ast_toplevel *tl)
+{
+ tl->loc = lex_loc(lexer);
+
+ if (parse_include(lexer, &tl->tl.include)) {
+ tl->kind = TOP_INCLUDE;
+ } else if (parse_global(lexer, &tl->tl.globalvar)) {
+ tl->kind = TOP_GLOBAL;
+ } else if (parse_const(lexer, &tl->tl.constant)) {
+ tl->kind = TOP_CONST;
+ } else if (parse_enum(lexer, &tl->tl.enumconst)) {
+ tl->kind = TOP_ENUM;
+ } else if (parse_struct(lexer, &tl->tl.structdecl)) {
+ tl->kind = TOP_STRUCT;
+ } else if (parse_union(lexer, &tl->tl.uniondecl)) {
+ tl->kind = TOP_UNION;
+ } else if (parse_func(lexer, &tl->tl.function)) {
+ tl->kind = TOP_FUNC;
+ } else {
+ return 0;
+ }
+
+ return 1;
}
void
parse(struct lexer *lexer, struct ast_unit *u)
{
+ u->tops = must_calloc(1, sizeof(struct ast_toplevel));
+ u->topsz = 1;
+ u->toplen = 0;
+
+ struct ast_toplevel tl;
+ while (parse_toplevel(lexer, &tl)) {
+ if (u->toplen >= u->topsz) {
+ u->topsz *= 2;
+ size_t newsz = u->topsz * sizeof(struct ast_toplevel);
+ u->tops = must_realloc(u->tops, newsz);
+ }
+ u->tops[u->toplen++] = tl;
+ }
}