aboutsummaryrefslogtreecommitdiff
path: root/include/lex.h
blob: 23619d5b3ae5b1e37293112ae6da14a2fd3046d6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#ifndef CERC_LEX_H
#define CERC_LEX_H
#include <stdio.h>
#include "utf8.h"

#define C_EOF UTF8_INVALID

enum lexical_token {
	// Keywords
	T_BREAK,
	T_CONST,
	T_CONTINUE,
	T_DEFER,
	T_ELSE,
	T_ENUM,
	T_EXPORT,
	T_EXTERN,
	T_FOR,
	T_FUNC,
	T_IF,
	T_IMPORT,
	T_LET,
	T_MUT,
	T_PUB,
	T_RETURN,
	T_STRUCT,
	T_UNION,
	T_LAST_KEYWORD,

	// Builtin types
	T_F32,
	T_F64,
	T_I8,
	T_I16,
	T_I32,
	T_I64,
	T_ISIZE,
	T_U8,
	T_U16,
	T_U32,
	T_U64,
	T_USIZE,
	T_LAST_BUILTIN_TYPE,

	// Operators
	T_ADD,
	T_AND,
	T_BAND,
	T_BANG,
	T_BNOT,
	T_BOR,
	T_BSHL,
	T_BSHR,
	T_BXOR,
	T_COLON,
	T_COMMA,
	T_DIV,
	T_DOT,
	T_EQ,
	T_GE,
	T_GT,
	T_LBRACE,
	T_LBRACKET,
	T_LE,
	T_LPAREN,
	T_LT,
	T_MINUS,
	T_MODULO,
	T_NEQ,
	T_OR,
	T_QUESTION,
	T_RBRACE,
	T_RBRACKET,
	T_RPAREN,
	T_SEMICOLON,
	T_STAR,
	T_UNDERSCORE,
	T_LAST_OPERATOR,

	// Tokens with additional information
	T_IDENT,
	T_NAME,
	T_NUMBER,
	T_STRING,

	// Magic values
	T_EOF,
	T_NONE,
};

extern const char *tokens[];

struct token {
	enum lexical_token token;
	union {
		const char *str;
	} info;
};

struct lexer {
	FILE *in;
	uint32_t c[2];
	char *buf;
	size_t bufsz, buflen;
	struct token un;
};

void lex_init(struct lexer *lexer, FILE *f);
void lex_finish(struct lexer *lexer);

enum lexical_token lex(struct lexer *lexer, struct token *out);
void unlex(struct lexer *lexer, const struct token *in);

#endif