1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#ifndef CERC_LEX_H
#define CERC_LEX_H
#include <stdbool.h>
#include <stdio.h>
#include "utf8.h"
#define C_EOF UTF8_INVALID
enum lexical_token {
// Keywords
T_BREAK,
T_CONST,
T_CONTINUE,
T_DEFER,
T_ELSE,
T_ENUM,
T_EXPORT,
T_EXTERN,
T_FALSE,
T_FOR,
T_FUNC,
T_IF,
T_IMPORT,
T_LET,
T_MUT,
T_PUB,
T_RETURN,
T_STRUCT,
T_TRUE,
T_UNION,
T_LAST_KEYWORD,
// Builtin types
T_FLOAT32,
T_FLOAT64,
T_INT8,
T_INT16,
T_INT32,
T_INT64,
T_INT,
T_UINT8,
T_UINT16,
T_UINT32,
T_UINT64,
T_UINT,
T_LAST_BUILTIN_TYPE,
// Operators
T_ADD,
T_AND,
T_ASSIGN,
T_BAND,
T_BANG,
T_BNOT,
T_BOR,
T_BSHL,
T_BSHR,
T_BXOR,
T_COLON,
T_COMMA,
T_DIV,
T_DOT,
T_EQ,
T_GE,
T_GT,
T_LBRACE,
T_LBRACKET,
T_LE,
T_LPAREN,
T_LT,
T_MINUS,
T_MODDELIM,
T_MODULO,
T_NEQ,
T_OR,
T_QUESTION,
T_RBRACE,
T_RBRACKET,
T_RPAREN,
T_SEMICOLON,
T_STAR,
T_UNDERSCORE,
T_LAST_OPERATOR,
// Tokens with additional information
T_COMMENT,
T_NAME,
T_NUMBER,
T_STRING,
// Magic values
T_EOF,
T_NONE,
};
extern const char *tokens[];
struct number {
bool isfloat;
union {
uint64_t integer;
double floatingpt;
} value;
};
struct token {
enum lexical_token token;
union {
struct number num;
const char *str;
} info;
};
struct lexer {
FILE *in;
uint32_t c[2];
char *buf;
size_t bufsz, buflen;
struct token un;
};
void lex_init(struct lexer *lexer, FILE *f);
void lex_finish(struct lexer *lexer);
enum lexical_token lex(struct lexer *lexer, struct token *out);
void unlex(struct lexer *lexer, const struct token *in);
bool match(struct lexer *lexer, enum lexical_token token);
#endif
|