diff options
author | Himbeer <himbeer@disroot.org> | 2024-09-26 14:13:21 +0200 |
---|---|---|
committer | Himbeer <himbeer@disroot.org> | 2024-09-26 14:13:21 +0200 |
commit | 8ea5034f93cc0d06cdad8bcefc02a57d9e562146 (patch) | |
tree | 42a2bf4920a4d415b6ab779f8fc1ed06e525bbb7 | |
parent | 91730d20c1c2befe5ccb52f63f2d4200393ae291 (diff) |
Implement type parsing
-rw-r--r-- | include/expr.h | 5 | ||||
-rw-r--r-- | include/parse.h | 15 | ||||
-rw-r--r-- | include/type.h | 24 | ||||
-rw-r--r-- | include/util.h | 7 | ||||
-rw-r--r-- | src/parse.c | 605 |
5 files changed, 635 insertions, 21 deletions
diff --git a/include/expr.h b/include/expr.h index 8b4d8b0..5b0ce59 100644 --- a/include/expr.h +++ b/include/expr.h @@ -32,11 +32,6 @@ struct literal_e { } lit; }; -struct path { - const char **segments; - int segsz, seglen; -}; - struct call_e { struct path path; struct disjunction_e *args; diff --git a/include/parse.h b/include/parse.h index de30f39..981d8c6 100644 --- a/include/parse.h +++ b/include/parse.h @@ -9,7 +9,8 @@ struct ast_expr { struct ast_externfunc { const char *name; struct type ret; - struct type params[]; + struct type *params; + int paramsz, paramlen; }; struct ast_param { @@ -24,11 +25,12 @@ struct ast_block { }; struct ast_func { - bool pub, exported; + bool exported; const char *name; - struct ast_block block; struct type ret; - struct ast_param params[]; + struct ast_param *params; + int paramsz, paramlen; + struct ast_block block; }; enum const_global { @@ -39,14 +41,13 @@ enum const_global { }; struct ast_const_global { - bool pub; const char *name; enum const_global kind; union { - struct type *type; + struct type type; bool b; struct number num; - const char *str; + char *str; } value; }; diff --git a/include/type.h b/include/type.h index 78a3d18..c8a6b76 100644 --- a/include/type.h +++ b/include/type.h @@ -2,6 +2,12 @@ #define CERC_TYPE_H #include <stdbool.h> #include <stdint.h> +#include "expr.h" +#include "lex.h" +#include "util.h" + +// Only 64-bit targets are supported +#define PLATBITS 64 struct cer_int { bool sign; @@ -14,11 +20,7 @@ struct cer_float { struct cer_array { struct type *member_type; - int length; -}; - -struct cer_slice { - struct type *member_type; + struct disjunction_e *length; }; struct field { @@ -28,6 +30,7 @@ struct field { struct cer_struct { struct field *fields; + int fieldsz, fieldlen; }; struct variant { @@ -36,12 +39,15 @@ struct variant { }; struct cer_enum { - struct cer_int tag_type; - struct variant variants[]; + struct type *tag_type; + struct variant *variants; + int variantsz, variantlen; }; struct cer_union { + struct type *tag_type; struct type *alts; + int altsz, altlen; }; enum type_kind { @@ -52,6 +58,7 @@ enum type_kind { TYP_FLOAT, TYP_INT, TYP_OPTIONAL, + TYP_PATH, TYP_POINTER, TYP_SLICE, TYP_STRUCT, @@ -62,10 +69,11 @@ union type_desc { struct cer_int i; struct cer_float f; struct cer_array array; - struct cer_slice slice; struct cer_struct s; struct cer_enum en; struct cer_union un; + struct type *inner; + struct path path; }; struct type { diff --git a/include/util.h b/include/util.h index 5a5a819..0e2b289 100644 --- a/include/util.h +++ b/include/util.h @@ -1,5 +1,7 @@ #ifndef CERC_UTIL_H #define CERC_UTIL_H +#include <stddef.h> + enum exit_status { /* EXIT_SUCCESS = 0 (defined in stdlib.h) */ EXIT_USER = 1, @@ -9,6 +11,11 @@ enum exit_status { EXIT_ABNORMAL = 255, }; +struct path { + const char **segments; + int segsz, seglen; +}; + void *must_malloc(size_t size); void *must_calloc(size_t nmemb, size_t size); void *must_realloc(void *ptr, size_t size); diff --git a/src/parse.c b/src/parse.c index 8789d37..b235b24 100644 --- a/src/parse.c +++ b/src/parse.c @@ -9,6 +9,7 @@ static bool parse_disjunction_e(struct lexer *lexer, struct disjunction_e *out); +static bool parse_type(struct lexer *lexer, struct type *out); static noreturn void error(struct location loc, const char *fmt, ...) @@ -42,7 +43,8 @@ parse_path(struct lexer *lexer, struct path *out) struct token name; if (lex(lexer, &name) != T_NAME) { - error(name.loc, "syntax error: expected path"); + unlex(lexer, &name); + return false; } out->segments = must_calloc(1, sizeof(const char *)); @@ -584,12 +586,603 @@ parse_import(struct lexer *lexer, struct ast_import *out) return true; } +static bool +parse_integer(struct lexer *lexer, struct type *out) +{ + struct token token; + switch(lex(lexer, &token)) { + case T_INT8: + out->kind = TYP_INT; + out->desc.i.sign = true; + out->desc.i.bits = 8; + return true; + case T_UINT8: + out->kind = TYP_INT; + out->desc.i.sign = false; + out->desc.i.bits = 8; + return true; + case T_INT16: + out->kind = TYP_INT; + out->desc.i.sign = true; + out->desc.i.bits = 16; + return true; + case T_UINT16: + out->kind = TYP_INT; + out->desc.i.sign = false; + out->desc.i.bits = 16; + return true; + case T_INT32: + out->kind = TYP_INT; + out->desc.i.sign = true; + out->desc.i.bits = 32; + return true; + case T_UINT32: + out->kind = TYP_INT; + out->desc.i.sign = false; + out->desc.i.bits = 32; + return true; + case T_INT64: + out->kind = TYP_INT; + out->desc.i.sign = true; + out->desc.i.bits = 64; + return true; + case T_UINT64: + out->kind = TYP_INT; + out->desc.i.sign = false; + out->desc.i.bits = 64; + return true; + case T_INT: + out->kind = TYP_INT; + out->desc.i.sign = true; + out->desc.i.bits = PLATBITS; + return true; + case T_UINT: + out->kind = TYP_INT; + out->desc.i.sign = false; + out->desc.i.bits = PLATBITS; + return true; + default: + unlex(lexer, &token); + return false; + } +} + +static bool +parse_float(struct lexer *lexer, struct type *out) +{ + struct token token; + switch (lex(lexer, &token)) { + case T_FLOAT32: + out->kind = TYP_FLOAT; + out->desc.f.is64 = false; + return true; + case T_FLOAT64: + out->kind = TYP_FLOAT; + out->desc.f.is64 = true; + default: + unlex(lexer, &token); + return false; + } +} + +static bool +parse_field(struct lexer *lexer, struct field *out) +{ + struct token name; + if (lex(lexer, &name) != T_NAME) { + return false; + } + out->name = name.info.str; + + struct type *type = must_malloc(sizeof(struct type)); + if (!parse_type(lexer, type)) { + error(lex_loc(lexer), "syntax error: expected type"); + } + out->type = type; + + return true; +} + +static bool +parse_struct(struct lexer *lexer, struct type *out) +{ + if (!match(lexer, T_STRUCT)) { + return false; + } + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "syntax error: expected '{'"); + } + + out->kind = TYP_STRUCT; + out->desc.s.fields = must_calloc(1, sizeof(struct field)); + out->desc.s.fieldsz = 1; + out->desc.s.fieldlen = 0; + + struct field field; + while (parse_field(lexer, &field)) { + if (out->desc.s.fieldlen >= out->desc.s.fieldsz) { + out->desc.s.fieldsz *= 2; + size_t sz = sizeof(struct field) * out->desc.s.fieldsz; + out->desc.s.fields = must_realloc(out->desc.s.fields, + sz); + } + out->desc.s.fields[out->desc.s.fieldlen++] = field; + + if (!match(lexer, T_COMMA)) { + break; + } + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "syntax error: expected '}'"); + } + + return true; +} + +static bool +parse_enum(struct lexer *lexer, struct type *out) +{ + if (!match(lexer, T_ENUM)) { + return false; + } + + out->kind = TYP_ENUM; + // Default tag type is uint + out->desc.en.tag_type = malloc(sizeof(struct type)); + out->desc.en.tag_type->kind = TYP_INT; + out->desc.en.tag_type->desc.i.sign = false; + out->desc.en.tag_type->desc.i.bits = PLATBITS; + + if (match(lexer, T_LPAREN)) { + if (!parse_type(lexer, out->desc.en.tag_type)) { + error(lex_loc(lexer), + "syntax error: expected tag type"); + } + + if (!match(lexer, T_RPAREN)) { + error(lex_loc(lexer), "syntax error: expected ')'"); + } + } + + struct disjunction_e init; + if (match(lexer, T_ASSIGN)) { + if (!parse_disjunction_e(lexer, &init)) { + error(lex_loc(lexer), + "syntax error: expected expression"); + } + } + + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "syntax error: expected '{'"); + } + + out->desc.en.variants = must_calloc(1, sizeof(struct variant)); + out->desc.en.variantsz = 1; + out->desc.en.variantlen = 0; + + struct token name; + uint64_t tag = 0; + bool commabreak = false; + while (lex(lexer, &name) == T_NAME) { + if (out->desc.en.variantlen >= out->desc.en.variantsz) { + out->desc.en.variantsz *= 2; + size_t sz = sizeof(struct variant) * + out->desc.en.variantsz; + out->desc.en.variants = must_realloc( + out->desc.en.variants, sz); + } + out->desc.en.variants[out->desc.en.variantlen].name = + name.info.str; + out->desc.en.variants[out->desc.en.variantlen++].tag = tag++; + + if (!match(lexer, T_COMMA)) { + commabreak = true; + break; + } + } + if (!commabreak) { + unlex(lexer, &name); + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "syntax error: expected '}'"); + } + + return true; +} + +static bool parse_union(struct lexer *lexer, struct type *out) +{ + if (!match(lexer, T_UNION)) { + return false; + } + if (!match(lexer, T_LBRACE)) { + error(lex_loc(lexer), "syntax error: expected '{'"); + } + + out->kind = TYP_UNION; + out->desc.un.tag_type = NULL; + out->desc.un.alts = must_calloc(1, sizeof(struct type)); + out->desc.un.altsz = 1; + out->desc.un.altlen = 0; + + struct type alt; + while (parse_type(lexer, &alt)) { + if (out->desc.un.altlen >= out->desc.un.altsz) { + out->desc.un.altsz *= 2; + size_t sz = sizeof(struct type) * out->desc.un.altsz; + out->desc.un.alts = must_realloc(out->desc.un.alts, sz); + } + out->desc.un.alts[out->desc.un.altlen++] = alt; + + if (!match(lexer, T_COMMA)) { + break; + } + } + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "syntax error: expected '}'"); + } + + return true; +} + +static bool +parse_array(struct lexer *lexer, struct type *out) +{ + if (!match(lexer, T_LBRACKET)) { + return false; + } + + out->kind = TYP_ARRAY; + + out->desc.array.length = must_malloc(sizeof(struct disjunction_e)); + if (!parse_disjunction_e(lexer, out->desc.array.length)) { + out->kind = TYP_SLICE; + } + + if (!match(lexer, T_RBRACKET)) { + error(lex_loc(lexer), "syntax error: expected ']'"); + } + + struct type *type = must_malloc(sizeof(struct type)); + if (!parse_type(lexer, type)) { + error(lex_loc(lexer), "syntax error: expected type"); + } + out->desc.array.member_type = type; + + return true; +} + +static bool +parse_type(struct lexer *lexer, struct type *out) +{ + struct token token; + switch (lex(lexer, &token)) { + case T_STAR: + out->kind = TYP_POINTER; + out->desc.inner = must_malloc(sizeof(struct type)); + return parse_type(lexer, out->desc.inner); + case T_QUESTION: + out->kind = TYP_OPTIONAL; + out->desc.inner = must_malloc(sizeof(struct type)); + return parse_type(lexer, out->desc.inner); + case T_BANG: + out->kind = TYP_FALLIBLE; + out->desc.inner = must_malloc(sizeof(struct type)); + return parse_type(lexer, out->desc.inner); + case T_BOOL: + out->kind = TYP_BOOL; + return true; + } + unlex(lexer, &token); + + if (parse_integer(lexer, out)) { + return true; + } + if (parse_float(lexer, out)) { + return true; + } + if (parse_struct(lexer, out)) { + return true; + } + if (parse_enum(lexer, out)) { + return true; + } + if (parse_union(lexer, out)) { + return true; + } + if (parse_array(lexer, out)) { + return true; + } + + struct path path; + if (parse_path(lexer, &path)) { + out->kind = TYP_PATH; + out->desc.path = path; + return true; + } + + return false; +} + +static bool +parse_externfunc(struct lexer *lexer, struct ast_externfunc *out) +{ + out->params = must_calloc(1, sizeof(struct type)); + out->paramsz = 1; + out->paramlen = 0; + + if (!match(lexer, T_EXTERN)) { + return false; + } + if (!match(lexer, T_FUNC)) { + error(lex_loc(lexer), "syntax error: expected keyword 'func'"); + } + + struct token name; + if (lex(lexer, &name) != T_NAME) { + error(lex_loc(lexer), "syntax error: expected name"); + } + out->name = name.info.str; + + if (!match(lexer, T_LPAREN)) { + error(lex_loc(lexer), "syntax error: expected '('"); + } + + struct type paramtype; + while (parse_type(lexer, ¶mtype)) { + if (out->paramlen >= out->paramsz) { + out->paramsz *= 2; + size_t sz = sizeof(struct type) * out->paramsz; + out->params = must_realloc(out->params, sz); + } + out->params[out->paramlen++] = paramtype; + + if (!match(lexer, T_COMMA)) { + break; + } + } + + if (!match(lexer, T_RPAREN)) { + error(lex_loc(lexer), "syntax error: expected ')'"); + } + + bool nonvoid = parse_type(lexer, &out->ret); + + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), "syntax error: expected semicolon"); + } + + return true; +} + +static bool +parse_block(struct lexer *lexer, struct ast_block *out) +{ + if (!match(lexer, T_LBRACE)) { + return false; + } + + /* TODO */ + + if (!match(lexer, T_RBRACE)) { + error(lex_loc(lexer), "syntax error: expected '}'"); + } + + return true; +} + +static bool +parse_param(struct lexer *lexer, struct ast_param *out) +{ + struct token name; + if (lex(lexer, &name) != T_NAME) { + unlex(lexer, &name); + return false; + } + out->name = name.info.str; + + if (!parse_type(lexer, &out->type)) { + error(lex_loc(lexer), "syntax error: expected type"); + } + + return true; +} + +static bool +parse_function(struct lexer *lexer, struct ast_func *out) +{ + out->params = must_calloc(1, sizeof(struct ast_param)); + out->paramsz = 1; + out->paramlen = 0; + + out->exported = match(lexer, T_EXPORT); + + if (!match(lexer, T_FUNC)) { + if (out->exported) { + error(lex_loc(lexer), + "syntax error: expected keyword 'func'"); + } + return false; + } + + struct token name; + if (lex(lexer, &name) != T_NAME) { + error(lex_loc(lexer), "syntax error: expected name"); + } + out->name = name.info.str; + + if (!match(lexer, T_LPAREN)) { + error(lex_loc(lexer), "syntax error: expected '('"); + } + + struct ast_param param; + while (parse_param(lexer, ¶m)) { + if (out->paramlen >= out->paramsz) { + out->paramsz *= 2; + size_t sz = sizeof(struct ast_param) * out->paramsz; + out->params = must_realloc(out->params, sz); + } + out->params[out->paramlen++] = param; + + if (!match(lexer, T_COMMA)) { + break; + } + } + + if (!match(lexer, T_RPAREN)) { + error(lex_loc(lexer), "syntax error: expected ')'"); + } + + bool nonvoid = parse_type(lexer, &out->ret); + + if (!parse_block(lexer, &out->block)) { + error(lex_loc(lexer), "syntax error: expected block"); + } + + return true; +} + +static bool +parse_bool(struct lexer *lexer, bool *out) +{ + struct token token; + switch (lex(lexer, &token)) { + case T_FALSE: + *out = false; + return true; + case T_TRUE: + *out = true; + return true; + default: + unlex(lexer, &token); + return false; + } +} + +static bool +parse_const_global(struct lexer *lexer, struct ast_const_global *out) +{ + if (!match(lexer, T_CONST)) { + return false; + } + + struct token name; + if (lex(lexer, &name) != T_NAME) { + error(lex_loc(lexer), "syntax error: expected name"); + } + out->name = name.info.str; + + if (!match(lexer, T_ASSIGN)) { + error(lex_loc(lexer), "syntax error: expected '='"); + } + + struct token token; + if (lex(lexer, &token) == T_NUMBER) { + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), + "syntax error: expected semicolon"); + } + + out->kind = CST_NUMBER; + out->value.num = token.info.num; + return true; + } + unlex(lexer, &token); + + if (parse_bool(lexer, &out->value.b)) { + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), + "syntax error: expected semicolon"); + } + + out->kind = CST_BOOL; + return true; + } + + bool isstr = false; + out->value.str = must_malloc(1); + memset(out->value.str, 0, 1); + while (lex(lexer, &token) == T_STRING) { + size_t sz = strlen(out->value.str) + strlen(token.info.str) + 1; + out->value.str = must_realloc(out->value.str, sz); + strcat(out->value.str, token.info.str); + isstr = true; + } + unlex(lexer, &token); + + if (isstr) { + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), + "syntax error: expected semicolon"); + } + + out->kind = CST_STRING; + return true; + } + + if (parse_type(lexer, &out->value.type)) { + if (!match(lexer, T_SEMICOLON)) { + error(lex_loc(lexer), + "syntax error: expected semicolon"); + } + + out->kind = CST_TYPE; + return true; + } + + error(lex_loc(lexer), + "syntax error: expected number, bool, string or type"); +} + +static bool +parse_toplevel(struct lexer *lexer, struct ast_toplevel *out) +{ + struct ast_externfunc *extfn = + must_malloc(sizeof(struct ast_externfunc)); + if (parse_externfunc(lexer, extfn)) { + out->kind = TOP_EXTERNFUNC; + out->decl.extfn = extfn; + return true; + } + free(extfn); + + struct ast_func *func = must_malloc(sizeof(struct ast_func)); + if (parse_function(lexer, func)) { + out->kind = TOP_FUNC; + out->decl.function = func; + return true; + } + free(func); + + struct ast_const_global *c = + must_malloc(sizeof(struct ast_const_global)); + if (parse_const_global(lexer, c)) { + out->kind = TOP_CONST; + out->decl.constant = c; + return true; + } + free(c); + + if (!match(lexer, T_EOF)) { + error(lex_loc(lexer), + "syntax error: expected top-level declaration"); + } + + return false; +} + void parse(struct lexer *lexer, struct ast_unit *ast) { ast->imports = must_calloc(1, sizeof(struct ast_import)); + ast->tops = must_calloc(1, sizeof(struct ast_toplevel)); ast->impsz = 1; ast->implen = 0; + ast->topsz = 1; + ast->toplen = 0; struct ast_import import; while (parse_import(lexer, &import)) { @@ -600,4 +1193,14 @@ parse(struct lexer *lexer, struct ast_unit *ast) } ast->imports[ast->implen++] = import; } + + struct ast_toplevel top; + while (parse_toplevel(lexer, &top)) { + if (ast->toplen >= ast->topsz) { + ast->topsz *= 2; + size_t sz = sizeof(struct ast_toplevel) * ast->topsz; + ast->tops = must_realloc(ast->tops, sz); + } + ast->tops[ast->toplen++] = top; + } } |