aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHimbeer <himbeer@disroot.org>2024-09-26 14:13:21 +0200
committerHimbeer <himbeer@disroot.org>2024-09-26 14:13:21 +0200
commit8ea5034f93cc0d06cdad8bcefc02a57d9e562146 (patch)
tree42a2bf4920a4d415b6ab779f8fc1ed06e525bbb7
parent91730d20c1c2befe5ccb52f63f2d4200393ae291 (diff)
Implement type parsing
-rw-r--r--include/expr.h5
-rw-r--r--include/parse.h15
-rw-r--r--include/type.h24
-rw-r--r--include/util.h7
-rw-r--r--src/parse.c605
5 files changed, 635 insertions, 21 deletions
diff --git a/include/expr.h b/include/expr.h
index 8b4d8b0..5b0ce59 100644
--- a/include/expr.h
+++ b/include/expr.h
@@ -32,11 +32,6 @@ struct literal_e {
} lit;
};
-struct path {
- const char **segments;
- int segsz, seglen;
-};
-
struct call_e {
struct path path;
struct disjunction_e *args;
diff --git a/include/parse.h b/include/parse.h
index de30f39..981d8c6 100644
--- a/include/parse.h
+++ b/include/parse.h
@@ -9,7 +9,8 @@ struct ast_expr {
struct ast_externfunc {
const char *name;
struct type ret;
- struct type params[];
+ struct type *params;
+ int paramsz, paramlen;
};
struct ast_param {
@@ -24,11 +25,12 @@ struct ast_block {
};
struct ast_func {
- bool pub, exported;
+ bool exported;
const char *name;
- struct ast_block block;
struct type ret;
- struct ast_param params[];
+ struct ast_param *params;
+ int paramsz, paramlen;
+ struct ast_block block;
};
enum const_global {
@@ -39,14 +41,13 @@ enum const_global {
};
struct ast_const_global {
- bool pub;
const char *name;
enum const_global kind;
union {
- struct type *type;
+ struct type type;
bool b;
struct number num;
- const char *str;
+ char *str;
} value;
};
diff --git a/include/type.h b/include/type.h
index 78a3d18..c8a6b76 100644
--- a/include/type.h
+++ b/include/type.h
@@ -2,6 +2,12 @@
#define CERC_TYPE_H
#include <stdbool.h>
#include <stdint.h>
+#include "expr.h"
+#include "lex.h"
+#include "util.h"
+
+// Only 64-bit targets are supported
+#define PLATBITS 64
struct cer_int {
bool sign;
@@ -14,11 +20,7 @@ struct cer_float {
struct cer_array {
struct type *member_type;
- int length;
-};
-
-struct cer_slice {
- struct type *member_type;
+ struct disjunction_e *length;
};
struct field {
@@ -28,6 +30,7 @@ struct field {
struct cer_struct {
struct field *fields;
+ int fieldsz, fieldlen;
};
struct variant {
@@ -36,12 +39,15 @@ struct variant {
};
struct cer_enum {
- struct cer_int tag_type;
- struct variant variants[];
+ struct type *tag_type;
+ struct variant *variants;
+ int variantsz, variantlen;
};
struct cer_union {
+ struct type *tag_type;
struct type *alts;
+ int altsz, altlen;
};
enum type_kind {
@@ -52,6 +58,7 @@ enum type_kind {
TYP_FLOAT,
TYP_INT,
TYP_OPTIONAL,
+ TYP_PATH,
TYP_POINTER,
TYP_SLICE,
TYP_STRUCT,
@@ -62,10 +69,11 @@ union type_desc {
struct cer_int i;
struct cer_float f;
struct cer_array array;
- struct cer_slice slice;
struct cer_struct s;
struct cer_enum en;
struct cer_union un;
+ struct type *inner;
+ struct path path;
};
struct type {
diff --git a/include/util.h b/include/util.h
index 5a5a819..0e2b289 100644
--- a/include/util.h
+++ b/include/util.h
@@ -1,5 +1,7 @@
#ifndef CERC_UTIL_H
#define CERC_UTIL_H
+#include <stddef.h>
+
enum exit_status {
/* EXIT_SUCCESS = 0 (defined in stdlib.h) */
EXIT_USER = 1,
@@ -9,6 +11,11 @@ enum exit_status {
EXIT_ABNORMAL = 255,
};
+struct path {
+ const char **segments;
+ int segsz, seglen;
+};
+
void *must_malloc(size_t size);
void *must_calloc(size_t nmemb, size_t size);
void *must_realloc(void *ptr, size_t size);
diff --git a/src/parse.c b/src/parse.c
index 8789d37..b235b24 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -9,6 +9,7 @@
static bool parse_disjunction_e(struct lexer *lexer,
struct disjunction_e *out);
+static bool parse_type(struct lexer *lexer, struct type *out);
static noreturn void
error(struct location loc, const char *fmt, ...)
@@ -42,7 +43,8 @@ parse_path(struct lexer *lexer, struct path *out)
struct token name;
if (lex(lexer, &name) != T_NAME) {
- error(name.loc, "syntax error: expected path");
+ unlex(lexer, &name);
+ return false;
}
out->segments = must_calloc(1, sizeof(const char *));
@@ -584,12 +586,603 @@ parse_import(struct lexer *lexer, struct ast_import *out)
return true;
}
+static bool
+parse_integer(struct lexer *lexer, struct type *out)
+{
+ struct token token;
+ switch(lex(lexer, &token)) {
+ case T_INT8:
+ out->kind = TYP_INT;
+ out->desc.i.sign = true;
+ out->desc.i.bits = 8;
+ return true;
+ case T_UINT8:
+ out->kind = TYP_INT;
+ out->desc.i.sign = false;
+ out->desc.i.bits = 8;
+ return true;
+ case T_INT16:
+ out->kind = TYP_INT;
+ out->desc.i.sign = true;
+ out->desc.i.bits = 16;
+ return true;
+ case T_UINT16:
+ out->kind = TYP_INT;
+ out->desc.i.sign = false;
+ out->desc.i.bits = 16;
+ return true;
+ case T_INT32:
+ out->kind = TYP_INT;
+ out->desc.i.sign = true;
+ out->desc.i.bits = 32;
+ return true;
+ case T_UINT32:
+ out->kind = TYP_INT;
+ out->desc.i.sign = false;
+ out->desc.i.bits = 32;
+ return true;
+ case T_INT64:
+ out->kind = TYP_INT;
+ out->desc.i.sign = true;
+ out->desc.i.bits = 64;
+ return true;
+ case T_UINT64:
+ out->kind = TYP_INT;
+ out->desc.i.sign = false;
+ out->desc.i.bits = 64;
+ return true;
+ case T_INT:
+ out->kind = TYP_INT;
+ out->desc.i.sign = true;
+ out->desc.i.bits = PLATBITS;
+ return true;
+ case T_UINT:
+ out->kind = TYP_INT;
+ out->desc.i.sign = false;
+ out->desc.i.bits = PLATBITS;
+ return true;
+ default:
+ unlex(lexer, &token);
+ return false;
+ }
+}
+
+static bool
+parse_float(struct lexer *lexer, struct type *out)
+{
+ struct token token;
+ switch (lex(lexer, &token)) {
+ case T_FLOAT32:
+ out->kind = TYP_FLOAT;
+ out->desc.f.is64 = false;
+ return true;
+ case T_FLOAT64:
+ out->kind = TYP_FLOAT;
+ out->desc.f.is64 = true;
+ default:
+ unlex(lexer, &token);
+ return false;
+ }
+}
+
+static bool
+parse_field(struct lexer *lexer, struct field *out)
+{
+ struct token name;
+ if (lex(lexer, &name) != T_NAME) {
+ return false;
+ }
+ out->name = name.info.str;
+
+ struct type *type = must_malloc(sizeof(struct type));
+ if (!parse_type(lexer, type)) {
+ error(lex_loc(lexer), "syntax error: expected type");
+ }
+ out->type = type;
+
+ return true;
+}
+
+static bool
+parse_struct(struct lexer *lexer, struct type *out)
+{
+ if (!match(lexer, T_STRUCT)) {
+ return false;
+ }
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '{'");
+ }
+
+ out->kind = TYP_STRUCT;
+ out->desc.s.fields = must_calloc(1, sizeof(struct field));
+ out->desc.s.fieldsz = 1;
+ out->desc.s.fieldlen = 0;
+
+ struct field field;
+ while (parse_field(lexer, &field)) {
+ if (out->desc.s.fieldlen >= out->desc.s.fieldsz) {
+ out->desc.s.fieldsz *= 2;
+ size_t sz = sizeof(struct field) * out->desc.s.fieldsz;
+ out->desc.s.fields = must_realloc(out->desc.s.fields,
+ sz);
+ }
+ out->desc.s.fields[out->desc.s.fieldlen++] = field;
+
+ if (!match(lexer, T_COMMA)) {
+ break;
+ }
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '}'");
+ }
+
+ return true;
+}
+
+static bool
+parse_enum(struct lexer *lexer, struct type *out)
+{
+ if (!match(lexer, T_ENUM)) {
+ return false;
+ }
+
+ out->kind = TYP_ENUM;
+ // Default tag type is uint
+ out->desc.en.tag_type = malloc(sizeof(struct type));
+ out->desc.en.tag_type->kind = TYP_INT;
+ out->desc.en.tag_type->desc.i.sign = false;
+ out->desc.en.tag_type->desc.i.bits = PLATBITS;
+
+ if (match(lexer, T_LPAREN)) {
+ if (!parse_type(lexer, out->desc.en.tag_type)) {
+ error(lex_loc(lexer),
+ "syntax error: expected tag type");
+ }
+
+ if (!match(lexer, T_RPAREN)) {
+ error(lex_loc(lexer), "syntax error: expected ')'");
+ }
+ }
+
+ struct disjunction_e init;
+ if (match(lexer, T_ASSIGN)) {
+ if (!parse_disjunction_e(lexer, &init)) {
+ error(lex_loc(lexer),
+ "syntax error: expected expression");
+ }
+ }
+
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '{'");
+ }
+
+ out->desc.en.variants = must_calloc(1, sizeof(struct variant));
+ out->desc.en.variantsz = 1;
+ out->desc.en.variantlen = 0;
+
+ struct token name;
+ uint64_t tag = 0;
+ bool commabreak = false;
+ while (lex(lexer, &name) == T_NAME) {
+ if (out->desc.en.variantlen >= out->desc.en.variantsz) {
+ out->desc.en.variantsz *= 2;
+ size_t sz = sizeof(struct variant) *
+ out->desc.en.variantsz;
+ out->desc.en.variants = must_realloc(
+ out->desc.en.variants, sz);
+ }
+ out->desc.en.variants[out->desc.en.variantlen].name =
+ name.info.str;
+ out->desc.en.variants[out->desc.en.variantlen++].tag = tag++;
+
+ if (!match(lexer, T_COMMA)) {
+ commabreak = true;
+ break;
+ }
+ }
+ if (!commabreak) {
+ unlex(lexer, &name);
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '}'");
+ }
+
+ return true;
+}
+
+static bool parse_union(struct lexer *lexer, struct type *out)
+{
+ if (!match(lexer, T_UNION)) {
+ return false;
+ }
+ if (!match(lexer, T_LBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '{'");
+ }
+
+ out->kind = TYP_UNION;
+ out->desc.un.tag_type = NULL;
+ out->desc.un.alts = must_calloc(1, sizeof(struct type));
+ out->desc.un.altsz = 1;
+ out->desc.un.altlen = 0;
+
+ struct type alt;
+ while (parse_type(lexer, &alt)) {
+ if (out->desc.un.altlen >= out->desc.un.altsz) {
+ out->desc.un.altsz *= 2;
+ size_t sz = sizeof(struct type) * out->desc.un.altsz;
+ out->desc.un.alts = must_realloc(out->desc.un.alts, sz);
+ }
+ out->desc.un.alts[out->desc.un.altlen++] = alt;
+
+ if (!match(lexer, T_COMMA)) {
+ break;
+ }
+ }
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '}'");
+ }
+
+ return true;
+}
+
+static bool
+parse_array(struct lexer *lexer, struct type *out)
+{
+ if (!match(lexer, T_LBRACKET)) {
+ return false;
+ }
+
+ out->kind = TYP_ARRAY;
+
+ out->desc.array.length = must_malloc(sizeof(struct disjunction_e));
+ if (!parse_disjunction_e(lexer, out->desc.array.length)) {
+ out->kind = TYP_SLICE;
+ }
+
+ if (!match(lexer, T_RBRACKET)) {
+ error(lex_loc(lexer), "syntax error: expected ']'");
+ }
+
+ struct type *type = must_malloc(sizeof(struct type));
+ if (!parse_type(lexer, type)) {
+ error(lex_loc(lexer), "syntax error: expected type");
+ }
+ out->desc.array.member_type = type;
+
+ return true;
+}
+
+static bool
+parse_type(struct lexer *lexer, struct type *out)
+{
+ struct token token;
+ switch (lex(lexer, &token)) {
+ case T_STAR:
+ out->kind = TYP_POINTER;
+ out->desc.inner = must_malloc(sizeof(struct type));
+ return parse_type(lexer, out->desc.inner);
+ case T_QUESTION:
+ out->kind = TYP_OPTIONAL;
+ out->desc.inner = must_malloc(sizeof(struct type));
+ return parse_type(lexer, out->desc.inner);
+ case T_BANG:
+ out->kind = TYP_FALLIBLE;
+ out->desc.inner = must_malloc(sizeof(struct type));
+ return parse_type(lexer, out->desc.inner);
+ case T_BOOL:
+ out->kind = TYP_BOOL;
+ return true;
+ }
+ unlex(lexer, &token);
+
+ if (parse_integer(lexer, out)) {
+ return true;
+ }
+ if (parse_float(lexer, out)) {
+ return true;
+ }
+ if (parse_struct(lexer, out)) {
+ return true;
+ }
+ if (parse_enum(lexer, out)) {
+ return true;
+ }
+ if (parse_union(lexer, out)) {
+ return true;
+ }
+ if (parse_array(lexer, out)) {
+ return true;
+ }
+
+ struct path path;
+ if (parse_path(lexer, &path)) {
+ out->kind = TYP_PATH;
+ out->desc.path = path;
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+parse_externfunc(struct lexer *lexer, struct ast_externfunc *out)
+{
+ out->params = must_calloc(1, sizeof(struct type));
+ out->paramsz = 1;
+ out->paramlen = 0;
+
+ if (!match(lexer, T_EXTERN)) {
+ return false;
+ }
+ if (!match(lexer, T_FUNC)) {
+ error(lex_loc(lexer), "syntax error: expected keyword 'func'");
+ }
+
+ struct token name;
+ if (lex(lexer, &name) != T_NAME) {
+ error(lex_loc(lexer), "syntax error: expected name");
+ }
+ out->name = name.info.str;
+
+ if (!match(lexer, T_LPAREN)) {
+ error(lex_loc(lexer), "syntax error: expected '('");
+ }
+
+ struct type paramtype;
+ while (parse_type(lexer, &paramtype)) {
+ if (out->paramlen >= out->paramsz) {
+ out->paramsz *= 2;
+ size_t sz = sizeof(struct type) * out->paramsz;
+ out->params = must_realloc(out->params, sz);
+ }
+ out->params[out->paramlen++] = paramtype;
+
+ if (!match(lexer, T_COMMA)) {
+ break;
+ }
+ }
+
+ if (!match(lexer, T_RPAREN)) {
+ error(lex_loc(lexer), "syntax error: expected ')'");
+ }
+
+ bool nonvoid = parse_type(lexer, &out->ret);
+
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer), "syntax error: expected semicolon");
+ }
+
+ return true;
+}
+
+static bool
+parse_block(struct lexer *lexer, struct ast_block *out)
+{
+ if (!match(lexer, T_LBRACE)) {
+ return false;
+ }
+
+ /* TODO */
+
+ if (!match(lexer, T_RBRACE)) {
+ error(lex_loc(lexer), "syntax error: expected '}'");
+ }
+
+ return true;
+}
+
+static bool
+parse_param(struct lexer *lexer, struct ast_param *out)
+{
+ struct token name;
+ if (lex(lexer, &name) != T_NAME) {
+ unlex(lexer, &name);
+ return false;
+ }
+ out->name = name.info.str;
+
+ if (!parse_type(lexer, &out->type)) {
+ error(lex_loc(lexer), "syntax error: expected type");
+ }
+
+ return true;
+}
+
+static bool
+parse_function(struct lexer *lexer, struct ast_func *out)
+{
+ out->params = must_calloc(1, sizeof(struct ast_param));
+ out->paramsz = 1;
+ out->paramlen = 0;
+
+ out->exported = match(lexer, T_EXPORT);
+
+ if (!match(lexer, T_FUNC)) {
+ if (out->exported) {
+ error(lex_loc(lexer),
+ "syntax error: expected keyword 'func'");
+ }
+ return false;
+ }
+
+ struct token name;
+ if (lex(lexer, &name) != T_NAME) {
+ error(lex_loc(lexer), "syntax error: expected name");
+ }
+ out->name = name.info.str;
+
+ if (!match(lexer, T_LPAREN)) {
+ error(lex_loc(lexer), "syntax error: expected '('");
+ }
+
+ struct ast_param param;
+ while (parse_param(lexer, &param)) {
+ if (out->paramlen >= out->paramsz) {
+ out->paramsz *= 2;
+ size_t sz = sizeof(struct ast_param) * out->paramsz;
+ out->params = must_realloc(out->params, sz);
+ }
+ out->params[out->paramlen++] = param;
+
+ if (!match(lexer, T_COMMA)) {
+ break;
+ }
+ }
+
+ if (!match(lexer, T_RPAREN)) {
+ error(lex_loc(lexer), "syntax error: expected ')'");
+ }
+
+ bool nonvoid = parse_type(lexer, &out->ret);
+
+ if (!parse_block(lexer, &out->block)) {
+ error(lex_loc(lexer), "syntax error: expected block");
+ }
+
+ return true;
+}
+
+static bool
+parse_bool(struct lexer *lexer, bool *out)
+{
+ struct token token;
+ switch (lex(lexer, &token)) {
+ case T_FALSE:
+ *out = false;
+ return true;
+ case T_TRUE:
+ *out = true;
+ return true;
+ default:
+ unlex(lexer, &token);
+ return false;
+ }
+}
+
+static bool
+parse_const_global(struct lexer *lexer, struct ast_const_global *out)
+{
+ if (!match(lexer, T_CONST)) {
+ return false;
+ }
+
+ struct token name;
+ if (lex(lexer, &name) != T_NAME) {
+ error(lex_loc(lexer), "syntax error: expected name");
+ }
+ out->name = name.info.str;
+
+ if (!match(lexer, T_ASSIGN)) {
+ error(lex_loc(lexer), "syntax error: expected '='");
+ }
+
+ struct token token;
+ if (lex(lexer, &token) == T_NUMBER) {
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer),
+ "syntax error: expected semicolon");
+ }
+
+ out->kind = CST_NUMBER;
+ out->value.num = token.info.num;
+ return true;
+ }
+ unlex(lexer, &token);
+
+ if (parse_bool(lexer, &out->value.b)) {
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer),
+ "syntax error: expected semicolon");
+ }
+
+ out->kind = CST_BOOL;
+ return true;
+ }
+
+ bool isstr = false;
+ out->value.str = must_malloc(1);
+ memset(out->value.str, 0, 1);
+ while (lex(lexer, &token) == T_STRING) {
+ size_t sz = strlen(out->value.str) + strlen(token.info.str) + 1;
+ out->value.str = must_realloc(out->value.str, sz);
+ strcat(out->value.str, token.info.str);
+ isstr = true;
+ }
+ unlex(lexer, &token);
+
+ if (isstr) {
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer),
+ "syntax error: expected semicolon");
+ }
+
+ out->kind = CST_STRING;
+ return true;
+ }
+
+ if (parse_type(lexer, &out->value.type)) {
+ if (!match(lexer, T_SEMICOLON)) {
+ error(lex_loc(lexer),
+ "syntax error: expected semicolon");
+ }
+
+ out->kind = CST_TYPE;
+ return true;
+ }
+
+ error(lex_loc(lexer),
+ "syntax error: expected number, bool, string or type");
+}
+
+static bool
+parse_toplevel(struct lexer *lexer, struct ast_toplevel *out)
+{
+ struct ast_externfunc *extfn =
+ must_malloc(sizeof(struct ast_externfunc));
+ if (parse_externfunc(lexer, extfn)) {
+ out->kind = TOP_EXTERNFUNC;
+ out->decl.extfn = extfn;
+ return true;
+ }
+ free(extfn);
+
+ struct ast_func *func = must_malloc(sizeof(struct ast_func));
+ if (parse_function(lexer, func)) {
+ out->kind = TOP_FUNC;
+ out->decl.function = func;
+ return true;
+ }
+ free(func);
+
+ struct ast_const_global *c =
+ must_malloc(sizeof(struct ast_const_global));
+ if (parse_const_global(lexer, c)) {
+ out->kind = TOP_CONST;
+ out->decl.constant = c;
+ return true;
+ }
+ free(c);
+
+ if (!match(lexer, T_EOF)) {
+ error(lex_loc(lexer),
+ "syntax error: expected top-level declaration");
+ }
+
+ return false;
+}
+
void
parse(struct lexer *lexer, struct ast_unit *ast)
{
ast->imports = must_calloc(1, sizeof(struct ast_import));
+ ast->tops = must_calloc(1, sizeof(struct ast_toplevel));
ast->impsz = 1;
ast->implen = 0;
+ ast->topsz = 1;
+ ast->toplen = 0;
struct ast_import import;
while (parse_import(lexer, &import)) {
@@ -600,4 +1193,14 @@ parse(struct lexer *lexer, struct ast_unit *ast)
}
ast->imports[ast->implen++] = import;
}
+
+ struct ast_toplevel top;
+ while (parse_toplevel(lexer, &top)) {
+ if (ast->toplen >= ast->topsz) {
+ ast->topsz *= 2;
+ size_t sz = sizeof(struct ast_toplevel) * ast->topsz;
+ ast->tops = must_realloc(ast->tops, sz);
+ }
+ ast->tops[ast->toplen++] = top;
+ }
}