1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
#ifndef CERC_LEX_H
#define CERC_LEX_H
#include <stdio.h>
#include "utf8.h"
#define C_EOF UTF8_INVALID
enum lexical_token {
// Keywords
T_BREAK,
T_CONST,
T_CONTINUE,
T_DEFER,
T_ELSE,
T_ENUM,
T_EXPORT,
T_EXTERN,
T_FOR,
T_FUNC,
T_IF,
T_IMPORT,
T_LET,
T_MUT,
T_PUB,
T_RETURN,
T_STRUCT,
T_UNION,
T_LAST_KEYWORD,
// Builtin types
T_F32,
T_F64,
T_I8,
T_I16,
T_I32,
T_I64,
T_ISIZE,
T_U8,
T_U16,
T_U32,
T_U64,
T_USIZE,
T_LAST_BUILTIN_TYPE,
// Operators
T_ADD,
T_AND,
T_BAND,
T_BANG,
T_BNOT,
T_BOR,
T_BSHL,
T_BSHR,
T_BXOR,
T_COLON,
T_COMMA,
T_DIV,
T_DOT,
T_EQ,
T_GE,
T_GT,
T_LBRACE,
T_LBRACKET,
T_LE,
T_LPAREN,
T_LT,
T_MINUS,
T_MODULO,
T_NEQ,
T_OR,
T_QUESTION,
T_RBRACE,
T_RBRACKET,
T_RPAREN,
T_SEMICOLON,
T_STAR,
T_UNDERSCORE,
T_LAST_OPERATOR,
// Tokens with additional information
T_IDENT,
T_NAME,
T_NUMBER,
T_STRING,
// Magic values
T_EOF,
T_NONE,
};
extern const char *tokens[];
struct token {
enum lexical_token token;
union {
const char *str;
} info;
};
struct lexer {
FILE *in;
uint32_t c[2];
char *buf;
size_t bufsz, buflen;
struct token un;
};
void lex_init(struct lexer *lexer, FILE *f);
void lex_finish(struct lexer *lexer);
enum lexical_token lex(struct lexer *lexer, struct token *out);
void unlex(struct lexer *lexer, const struct token *in);
#endif
|