diff options
Diffstat (limited to 'include/lex.h')
-rw-r--r-- | include/lex.h | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/include/lex.h b/include/lex.h new file mode 100644 index 0000000..23619d5 --- /dev/null +++ b/include/lex.h @@ -0,0 +1,114 @@ +#ifndef CERC_LEX_H +#define CERC_LEX_H +#include <stdio.h> +#include "utf8.h" + +#define C_EOF UTF8_INVALID + +enum lexical_token { + // Keywords + T_BREAK, + T_CONST, + T_CONTINUE, + T_DEFER, + T_ELSE, + T_ENUM, + T_EXPORT, + T_EXTERN, + T_FOR, + T_FUNC, + T_IF, + T_IMPORT, + T_LET, + T_MUT, + T_PUB, + T_RETURN, + T_STRUCT, + T_UNION, + T_LAST_KEYWORD, + + // Builtin types + T_F32, + T_F64, + T_I8, + T_I16, + T_I32, + T_I64, + T_ISIZE, + T_U8, + T_U16, + T_U32, + T_U64, + T_USIZE, + T_LAST_BUILTIN_TYPE, + + // Operators + T_ADD, + T_AND, + T_BAND, + T_BANG, + T_BNOT, + T_BOR, + T_BSHL, + T_BSHR, + T_BXOR, + T_COLON, + T_COMMA, + T_DIV, + T_DOT, + T_EQ, + T_GE, + T_GT, + T_LBRACE, + T_LBRACKET, + T_LE, + T_LPAREN, + T_LT, + T_MINUS, + T_MODULO, + T_NEQ, + T_OR, + T_QUESTION, + T_RBRACE, + T_RBRACKET, + T_RPAREN, + T_SEMICOLON, + T_STAR, + T_UNDERSCORE, + T_LAST_OPERATOR, + + // Tokens with additional information + T_IDENT, + T_NAME, + T_NUMBER, + T_STRING, + + // Magic values + T_EOF, + T_NONE, +}; + +extern const char *tokens[]; + +struct token { + enum lexical_token token; + union { + const char *str; + } info; +}; + +struct lexer { + FILE *in; + uint32_t c[2]; + char *buf; + size_t bufsz, buflen; + struct token un; +}; + +void lex_init(struct lexer *lexer, FILE *f); +void lex_finish(struct lexer *lexer); + +enum lexical_token lex(struct lexer *lexer, struct token *out); +void unlex(struct lexer *lexer, const struct token *in); + +#endif |