1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
#ifndef CERC_LEX_H
#define CERC_LEX_H
#include <stdbool.h>
#include <stdio.h>
#include "utf8.h"
#define C_EOF UTF8_INVALID
enum lexical_token {
// Keywords
T_BREAK,
T_CONST,
T_CONTINUE,
T_DEFER,
T_ELSE,
T_EXTERN,
T_FALSE,
T_FOR,
T_FUNC,
T_IF,
T_INCLUDE,
T_LET,
T_RETURN,
T_STRUCT,
T_TRUE,
T_UNION,
T_VAR,
T_LAST_KEYWORD,
// Builtin types
T_BOOL,
T_FLOAT32,
T_FLOAT64,
T_INT8,
T_INT16,
T_INT32,
T_INT64,
T_INT,
T_UINT8,
T_UINT16,
T_UINT32,
T_UINT64,
T_UINT,
T_LAST_BUILTIN_TYPE,
// Operators
T_ADD,
T_AND,
T_ASSIGN,
T_BAND,
T_BANG,
T_BNOT,
T_BOR,
T_BSHL,
T_BSHR,
T_BXOR,
T_COLON,
T_COMMA,
T_DIV,
T_DOT,
T_EQ,
T_GE,
T_GT,
T_LBRACE,
T_LBRACKET,
T_LE,
T_LPAREN,
T_LT,
T_MINUS,
T_MODDELIM,
T_MODULO,
T_NEQ,
T_OR,
T_QUESTION,
T_RBRACE,
T_RBRACKET,
T_RPAREN,
T_SEMICOLON,
T_STAR,
T_UNDERSCORE,
T_LAST_OPERATOR,
// Tokens with additional information
T_COMMENT,
T_NAME,
T_NUMBER,
T_STRING,
// Magic values
T_EOF,
T_NONE,
};
extern const char *tokens[];
struct number {
bool isfloat;
union {
uint64_t integer;
double floatingpt;
} value;
};
struct location {
const char *file;
int line, column;
};
struct token {
enum lexical_token token;
struct location loc;
union {
struct number num;
const char *str;
} info;
};
struct lexer {
FILE *in;
uint32_t c[2];
char *buf;
size_t bufsz, buflen;
struct token un;
struct location loc;
};
void lex_init(struct lexer *lexer, FILE *f, const char *filename);
void lex_finish(struct lexer *lexer);
struct location lex_loc(struct lexer *lexer);
enum lexical_token lex(struct lexer *lexer, struct token *out);
enum lexical_token lex_any(struct lexer *lexer, struct token *out);
void unlex(struct lexer *lexer, const struct token *in);
bool match(struct lexer *lexer, enum lexical_token token);
#endif
|