diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/lex.h | 114 | ||||
-rw-r--r-- | include/utf8.h | 15 | ||||
-rw-r--r-- | include/util.h | 9 |
3 files changed, 138 insertions, 0 deletions
diff --git a/include/lex.h b/include/lex.h new file mode 100644 index 0000000..23619d5 --- /dev/null +++ b/include/lex.h @@ -0,0 +1,114 @@ +#ifndef CERC_LEX_H +#define CERC_LEX_H +#include <stdio.h> +#include "utf8.h" + +#define C_EOF UTF8_INVALID + +enum lexical_token { + // Keywords + T_BREAK, + T_CONST, + T_CONTINUE, + T_DEFER, + T_ELSE, + T_ENUM, + T_EXPORT, + T_EXTERN, + T_FOR, + T_FUNC, + T_IF, + T_IMPORT, + T_LET, + T_MUT, + T_PUB, + T_RETURN, + T_STRUCT, + T_UNION, + T_LAST_KEYWORD, + + // Builtin types + T_F32, + T_F64, + T_I8, + T_I16, + T_I32, + T_I64, + T_ISIZE, + T_U8, + T_U16, + T_U32, + T_U64, + T_USIZE, + T_LAST_BUILTIN_TYPE, + + // Operators + T_ADD, + T_AND, + T_BAND, + T_BANG, + T_BNOT, + T_BOR, + T_BSHL, + T_BSHR, + T_BXOR, + T_COLON, + T_COMMA, + T_DIV, + T_DOT, + T_EQ, + T_GE, + T_GT, + T_LBRACE, + T_LBRACKET, + T_LE, + T_LPAREN, + T_LT, + T_MINUS, + T_MODULO, + T_NEQ, + T_OR, + T_QUESTION, + T_RBRACE, + T_RBRACKET, + T_RPAREN, + T_SEMICOLON, + T_STAR, + T_UNDERSCORE, + T_LAST_OPERATOR, + + // Tokens with additional information + T_IDENT, + T_NAME, + T_NUMBER, + T_STRING, + + // Magic values + T_EOF, + T_NONE, +}; + +extern const char *tokens[]; + +struct token { + enum lexical_token token; + union { + const char *str; + } info; +}; + +struct lexer { + FILE *in; + uint32_t c[2]; + char *buf; + size_t bufsz, buflen; + struct token un; +}; + +void lex_init(struct lexer *lexer, FILE *f); +void lex_finish(struct lexer *lexer); + +enum lexical_token lex(struct lexer *lexer, struct token *out); +void unlex(struct lexer *lexer, const struct token *in); + +#endif diff --git a/include/utf8.h b/include/utf8.h new file mode 100644 index 0000000..f29dcd3 --- /dev/null +++ b/include/utf8.h @@ -0,0 +1,15 @@ +#ifndef CERC_UTF8_H +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#define CERC_UTF8_H +#define UTF8_MAX_SIZE 4 +#define UTF8_INVALID UINT32_MAX + +uint32_t utf8_decode(const char **s); +size_t utf8_encode(char *s, uint32_t c); +uint32_t utf8_get(FILE *f); + +#endif diff --git a/include/util.h b/include/util.h new file mode 100644 index 0000000..2a1e72a --- /dev/null +++ b/include/util.h @@ -0,0 +1,9 @@ +#ifndef CERC_UTIL_H +#define CERC_UTIL_H +enum exit_status { + /* EXIT_SUCCESS = 0 (defined in stdlib.h) */ + EXIT_USER = 1, + EXIT_ABNORMAL = 255, +}; + +#endif |