diff options
author | Himbeer <himbeer@disroot.org> | 2024-09-11 22:51:44 +0200 |
---|---|---|
committer | Himbeer <himbeer@disroot.org> | 2024-09-11 22:51:44 +0200 |
commit | 86d91d38ba462ef35a7960c4b91e2b0c123c9158 (patch) | |
tree | cfbccf96de45686cdc4d6a8585f1fc4b0ee51e6f /include/lex.h | |
parent | 7091ec2009ae8446802ed3c55b174208a53e2999 (diff) |
Add lexer for names and identifiers
Diffstat (limited to 'include/lex.h')
-rw-r--r-- | include/lex.h | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/include/lex.h b/include/lex.h new file mode 100644 index 0000000..23619d5 --- /dev/null +++ b/include/lex.h @@ -0,0 +1,114 @@ +#ifndef CERC_LEX_H +#define CERC_LEX_H +#include <stdio.h> +#include "utf8.h" + +#define C_EOF UTF8_INVALID + +enum lexical_token { + // Keywords + T_BREAK, + T_CONST, + T_CONTINUE, + T_DEFER, + T_ELSE, + T_ENUM, + T_EXPORT, + T_EXTERN, + T_FOR, + T_FUNC, + T_IF, + T_IMPORT, + T_LET, + T_MUT, + T_PUB, + T_RETURN, + T_STRUCT, + T_UNION, + T_LAST_KEYWORD, + + // Builtin types + T_F32, + T_F64, + T_I8, + T_I16, + T_I32, + T_I64, + T_ISIZE, + T_U8, + T_U16, + T_U32, + T_U64, + T_USIZE, + T_LAST_BUILTIN_TYPE, + + // Operators + T_ADD, + T_AND, + T_BAND, + T_BANG, + T_BNOT, + T_BOR, + T_BSHL, + T_BSHR, + T_BXOR, + T_COLON, + T_COMMA, + T_DIV, + T_DOT, + T_EQ, + T_GE, + T_GT, + T_LBRACE, + T_LBRACKET, + T_LE, + T_LPAREN, + T_LT, + T_MINUS, + T_MODULO, + T_NEQ, + T_OR, + T_QUESTION, + T_RBRACE, + T_RBRACKET, + T_RPAREN, + T_SEMICOLON, + T_STAR, + T_UNDERSCORE, + T_LAST_OPERATOR, + + // Tokens with additional information + T_IDENT, + T_NAME, + T_NUMBER, + T_STRING, + + // Magic values + T_EOF, + T_NONE, +}; + +extern const char *tokens[]; + +struct token { + enum lexical_token token; + union { + const char *str; + } info; +}; + +struct lexer { + FILE *in; + uint32_t c[2]; + char *buf; + size_t bufsz, buflen; + struct token un; +}; + +void lex_init(struct lexer *lexer, FILE *f); +void lex_finish(struct lexer *lexer); + +enum lexical_token lex(struct lexer *lexer, struct token *out); +void unlex(struct lexer *lexer, const struct token *in); + +#endif |