aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorHimbeer <himbeer@disroot.org>2024-09-11 22:51:44 +0200
committerHimbeer <himbeer@disroot.org>2024-09-11 22:51:44 +0200
commit86d91d38ba462ef35a7960c4b91e2b0c123c9158 (patch)
treecfbccf96de45686cdc4d6a8585f1fc4b0ee51e6f /include
parent7091ec2009ae8446802ed3c55b174208a53e2999 (diff)
Add lexer for names and identifiers
Diffstat (limited to 'include')
-rw-r--r--include/lex.h114
-rw-r--r--include/utf8.h15
-rw-r--r--include/util.h9
3 files changed, 138 insertions, 0 deletions
diff --git a/include/lex.h b/include/lex.h
new file mode 100644
index 0000000..23619d5
--- /dev/null
+++ b/include/lex.h
@@ -0,0 +1,114 @@
+#ifndef CERC_LEX_H
+#define CERC_LEX_H
+#include <stdio.h>
+#include "utf8.h"
+
+#define C_EOF UTF8_INVALID
+
+enum lexical_token {
+ // Keywords
+ T_BREAK,
+ T_CONST,
+ T_CONTINUE,
+ T_DEFER,
+ T_ELSE,
+ T_ENUM,
+ T_EXPORT,
+ T_EXTERN,
+ T_FOR,
+ T_FUNC,
+ T_IF,
+ T_IMPORT,
+ T_LET,
+ T_MUT,
+ T_PUB,
+ T_RETURN,
+ T_STRUCT,
+ T_UNION,
+ T_LAST_KEYWORD,
+
+ // Builtin types
+ T_F32,
+ T_F64,
+ T_I8,
+ T_I16,
+ T_I32,
+ T_I64,
+ T_ISIZE,
+ T_U8,
+ T_U16,
+ T_U32,
+ T_U64,
+ T_USIZE,
+ T_LAST_BUILTIN_TYPE,
+
+ // Operators
+ T_ADD,
+ T_AND,
+ T_BAND,
+ T_BANG,
+ T_BNOT,
+ T_BOR,
+ T_BSHL,
+ T_BSHR,
+ T_BXOR,
+ T_COLON,
+ T_COMMA,
+ T_DIV,
+ T_DOT,
+ T_EQ,
+ T_GE,
+ T_GT,
+ T_LBRACE,
+ T_LBRACKET,
+ T_LE,
+ T_LPAREN,
+ T_LT,
+ T_MINUS,
+ T_MODULO,
+ T_NEQ,
+ T_OR,
+ T_QUESTION,
+ T_RBRACE,
+ T_RBRACKET,
+ T_RPAREN,
+ T_SEMICOLON,
+ T_STAR,
+ T_UNDERSCORE,
+ T_LAST_OPERATOR,
+
+ // Tokens with additional information
+ T_IDENT,
+ T_NAME,
+ T_NUMBER,
+ T_STRING,
+
+ // Magic values
+ T_EOF,
+ T_NONE,
+};
+
+extern const char *tokens[];
+
+struct token {
+ enum lexical_token token;
+ union {
+ const char *str;
+ } info;
+};
+
+struct lexer {
+ FILE *in;
+ uint32_t c[2];
+ char *buf;
+ size_t bufsz, buflen;
+ struct token un;
+};
+
+void lex_init(struct lexer *lexer, FILE *f);
+void lex_finish(struct lexer *lexer);
+
+enum lexical_token lex(struct lexer *lexer, struct token *out);
+void unlex(struct lexer *lexer, const struct token *in);
+
+#endif
diff --git a/include/utf8.h b/include/utf8.h
new file mode 100644
index 0000000..f29dcd3
--- /dev/null
+++ b/include/utf8.h
@@ -0,0 +1,15 @@
+#ifndef CERC_UTF8_H
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define CERC_UTF8_H
+#define UTF8_MAX_SIZE 4
+#define UTF8_INVALID UINT32_MAX
+
+uint32_t utf8_decode(const char **s);
+size_t utf8_encode(char *s, uint32_t c);
+uint32_t utf8_get(FILE *f);
+
+#endif
diff --git a/include/util.h b/include/util.h
new file mode 100644
index 0000000..2a1e72a
--- /dev/null
+++ b/include/util.h
@@ -0,0 +1,9 @@
+#ifndef CERC_UTIL_H
+#define CERC_UTIL_H
+enum exit_status {
+ /* EXIT_SUCCESS = 0 (defined in stdlib.h) */
+ EXIT_USER = 1,
+ EXIT_ABNORMAL = 255,
+};
+
+#endif