aboutsummaryrefslogtreecommitdiff
path: root/include/lex.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/lex.h')
-rw-r--r--include/lex.h114
1 files changed, 114 insertions, 0 deletions
diff --git a/include/lex.h b/include/lex.h
new file mode 100644
index 0000000..23619d5
--- /dev/null
+++ b/include/lex.h
@@ -0,0 +1,114 @@
+#ifndef CERC_LEX_H
+#define CERC_LEX_H
+#include <stdio.h>
+#include "utf8.h"
+
+#define C_EOF UTF8_INVALID
+
+enum lexical_token {
+ // Keywords
+ T_BREAK,
+ T_CONST,
+ T_CONTINUE,
+ T_DEFER,
+ T_ELSE,
+ T_ENUM,
+ T_EXPORT,
+ T_EXTERN,
+ T_FOR,
+ T_FUNC,
+ T_IF,
+ T_IMPORT,
+ T_LET,
+ T_MUT,
+ T_PUB,
+ T_RETURN,
+ T_STRUCT,
+ T_UNION,
+ T_LAST_KEYWORD,
+
+ // Builtin types
+ T_F32,
+ T_F64,
+ T_I8,
+ T_I16,
+ T_I32,
+ T_I64,
+ T_ISIZE,
+ T_U8,
+ T_U16,
+ T_U32,
+ T_U64,
+ T_USIZE,
+ T_LAST_BUILTIN_TYPE,
+
+ // Operators
+ T_ADD,
+ T_AND,
+ T_BAND,
+ T_BANG,
+ T_BNOT,
+ T_BOR,
+ T_BSHL,
+ T_BSHR,
+ T_BXOR,
+ T_COLON,
+ T_COMMA,
+ T_DIV,
+ T_DOT,
+ T_EQ,
+ T_GE,
+ T_GT,
+ T_LBRACE,
+ T_LBRACKET,
+ T_LE,
+ T_LPAREN,
+ T_LT,
+ T_MINUS,
+ T_MODULO,
+ T_NEQ,
+ T_OR,
+ T_QUESTION,
+ T_RBRACE,
+ T_RBRACKET,
+ T_RPAREN,
+ T_SEMICOLON,
+ T_STAR,
+ T_UNDERSCORE,
+ T_LAST_OPERATOR,
+
+ // Tokens with additional information
+ T_IDENT,
+ T_NAME,
+ T_NUMBER,
+ T_STRING,
+
+ // Magic values
+ T_EOF,
+ T_NONE,
+};
+
+extern const char *tokens[];
+
+struct token {
+ enum lexical_token token;
+ union {
+ const char *str;
+ } info;
+};
+
+struct lexer {
+ FILE *in;
+ uint32_t c[2];
+ char *buf;
+ size_t bufsz, buflen;
+ struct token un;
+};
+
+void lex_init(struct lexer *lexer, FILE *f);
+void lex_finish(struct lexer *lexer);
+
+enum lexical_token lex(struct lexer *lexer, struct token *out);
+void unlex(struct lexer *lexer, const struct token *in);
+
+#endif